In [3]:
# 1. Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [7]:
# 2. Load your dataset
df = pd.read_excel("/Users/khadramahamoud/Documents/demo dataset.xlsx", skiprows=1)  # Adjusted for export formatting
df.columns = df.columns.str.strip()


In [9]:
df

Unnamed: 0,age_group,gender,device_usage_frequency,primary_use,usual_mode,eye_strain_experience,mode_choice_factors,comfort_light,comfort_dark,eye_strain_light,eye_strain_dark,focus_light,focus_dark,frustration_light,frustration_dark,satisfaction_light,satisfaction_dark,performance_diff,daily_screen_time,preferred_mode
0,65 or older,Male,Occasionally,Other,Light mode,Both,Visual comfort,1,4,5,5,5,2,1,5,5,5,5,10+h,light
1,35–44,Other,Very frequently,Other,Light mode,,Visual comfort,1,3,5,3,4,2,5,5,4,2,4,<1h,light
2,45–54,Other,Very frequently,Entertainment,No preference,,Personal preference,4,4,1,3,1,4,2,3,1,5,2,<1h,dark
3,65 or older,Male,Very frequently,Other,Light mode,Both,Personal preference,4,2,5,5,5,1,4,5,1,3,4,10+h,light
4,25–34,Male,Rarely,Creative activities,Switch depending on context,,Personal preference,5,2,5,4,1,5,5,2,2,3,2,10+h,dark
5,45–54,Other,Occasionally,Other,Light mode,Both,Platform,5,3,5,3,3,5,5,3,4,3,5,7–9h,dark
6,45–54,Female,Occasionally,Entertainment,Dark mode,Dark mode,Visual comfort,3,1,3,1,1,2,5,5,2,3,2,4–6h,dark
7,65 or older,Prefer not to say,Very frequently,Entertainment,Dark mode,Both,Platform,4,2,4,1,1,1,5,1,2,1,3,<1h,dark
8,18–24,Male,Very frequently,Creative activities,No preference,Both,Personal preference,1,5,2,2,1,2,5,2,2,3,3,4–6h,dark
9,25–34,Prefer not to say,Rarely,Social media and communication,Dark mode,Both,Visual comfort,5,2,3,3,4,3,3,2,3,4,3,4–6h,light


In [70]:
# 3. Feature engineering: Add difference-based features
df['comfort_diff'] = df['comfort_dark'] - df['comfort_light']
df['focus_diff'] = df['focus_dark'] - df['focus_light']
df['eye_strain_diff'] = df['eye_strain_dark'] - df['eye_strain_light']


In [72]:
# 4. Encode categorical variables
categorical_cols = [
    'age_group', 'gender', 'device_usage_frequency', 'primary_use',
    'usual_mode', 'eye_strain_experience', 'mode_choice_factors',
    'daily_screen_time'
]

label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Encode the target separately
target_encoder = LabelEncoder()
df['preferred_mode'] = target_encoder.fit_transform(df['preferred_mode'])  # light=0, dark=1



In [74]:
# 5. Select only the 17 features you want to keep

features_to_keep = [
    'age_group', 'gender', 'device_usage_frequency', 'primary_use',
    'usual_mode', 'eye_strain_experience', 'mode_choice_factors',
    'daily_screen_time',
    'comfort_light', 'comfort_dark',
    'eye_strain_light', 'eye_strain_dark',
    'focus_light', 'focus_dark',
    'comfort_diff', 'focus_diff', 'eye_strain_diff'
]

X = df[features_to_keep]
y = df['preferred_mode']


In [76]:
# 6. Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 

In [78]:
# 7a. Train Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print("🔍 Random Forest Results")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf, target_names=target_encoder.classes_))


🔍 Random Forest Results
Accuracy: 0.7

Classification Report:
               precision    recall  f1-score   support

        dark       0.62      1.00      0.77         5
       light       1.00      0.40      0.57         5

    accuracy                           0.70        10
   macro avg       0.81      0.70      0.67        10
weighted avg       0.81      0.70      0.67        10



In [80]:
# Save Random Forest
joblib.dump(rf_model, "rf_mode_predictor.pkl")


['rf_mode_predictor.pkl']

In [82]:
# 7a. Train Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print("🔍 Random Forest Results")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf, target_names=target_encoder.classes_))

# Save Random Forest
joblib.dump(rf_model, "rf_mode_predictor.pkl")



🔍 Random Forest Results
Accuracy: 0.7

Classification Report:
               precision    recall  f1-score   support

        dark       0.62      1.00      0.77         5
       light       1.00      0.40      0.57         5

    accuracy                           0.70        10
   macro avg       0.81      0.70      0.67        10
weighted avg       0.81      0.70      0.67        10



['rf_mode_predictor.pkl']

In [84]:
# 7b. Train Logistic Regression model
logreg_model = LogisticRegression(max_iter=1000, random_state=42)
logreg_model.fit(X_train, y_train)
y_pred_logreg = logreg_model.predict(X_test)

print("\n🧠 Logistic Regression Results")
print("Accuracy:", accuracy_score(y_test, y_pred_logreg))
print("\nClassification Report:\n", classification_report(y_test, y_pred_logreg, target_names=target_encoder.classes_))

# Save Logistic Regression model
joblib.dump(logreg_model, "logreg_mode_predictor.pkl")



🧠 Logistic Regression Results
Accuracy: 0.7

Classification Report:
               precision    recall  f1-score   support

        dark       0.67      0.80      0.73         5
       light       0.75      0.60      0.67         5

    accuracy                           0.70        10
   macro avg       0.71      0.70      0.70        10
weighted avg       0.71      0.70      0.70        10



['logreg_mode_predictor.pkl']

In [86]:

# 8. Save shared encoders for both models
joblib.dump(target_encoder, "target_encoder.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

['label_encoders.pkl']

In [88]:
import joblib

# Save your trained model
joblib.dump(logreg_model, "logreg_mode_predictor.pkl")

# Save your encoders
joblib.dump(target_encoder, "target_encoder.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")


['label_encoders.pkl']