In [1]:

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import joblib

# Load the dataset
data = pd.read_csv("ez_data.csv")

# Separate features (X) and target variable (y)
X = data.drop(columns=["Experience Level"])
y = data["Experience Level"]

# Encoding categorical features
label_encoder = LabelEncoder()
for column in X.columns:
    if X[column].dtype == 'object':
        X[column] = label_encoder.fit_transform(X[column])

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training level prediction model
level_classifier = RandomForestClassifier()
level_classifier.fit(X_train, y_train)

# Save the trained model and label encoder
joblib.dump(level_classifier, 'trained_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

# Making predictions on the test set
y_pred = level_classifier.predict(X_test)

# Evaluating model performance
accuracy = accuracy_score(y_test, y_pred)
print("Level Prediction Accuracy:", accuracy)
print("Classification Report for Level Prediction:")
print(classification_report(y_test, y_pred))

# Example usage of the trained model for prediction
new_data = pd.DataFrame({"Years of Experience": [5], "Familiarity with Concepts": ["Low"]})
for column in new_data.columns:
    if new_data[column].dtype == 'object':
        new_data[column] = label_encoder.transform(new_data[column])

predicted_level = level_classifier.predict(new_data)
print("Predicted Level for new data:", predicted_level[0])


Level Prediction Accuracy: 0.5
Classification Report for Level Prediction:
              precision    recall  f1-score   support

    Advanced       1.00      1.00      1.00         1
    Beginner       0.00      0.00      0.00         0
Intermediate       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.33      0.33      0.33         2
weighted avg       0.50      0.50      0.50         2

Predicted Level for new data: Advanced


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
level_classifier = joblib.load('trained_model.pkl')
label_encoder = joblib.load('label_encoder.pkl')
import sys
# Receive messages from Node.js
for line in sys.stdin:
    if line.strip() == 'hello':
        print('Received hello message from Node.js')
    else:
        # Received new data from Node.js
        new_data_json = line.strip()
        new_data = json.loads(new_data_json)

        # Convert new data to DataFrame
        new_data_df = pd.DataFrame(new_data, index=[0])

        # Encode categorical features
        for column in new_data_df.columns:
            if new_data_df[column].dtype == 'object':
                new_data_df[column] = label_encoder.transform(new_data_df[column])

        # Make predictions for the new data
        predicted_level = level_classifier.predict(new_data_df)
        print("Predicted Level for new data:", predicted_level[0])

        # Flush stdout to ensure message is sent immediately
        sys.stdout.flush()


KeyboardInterrupt: 