In [54]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
import pickle
import warnings

In [55]:
# Load your dataset
# Ensure your dataset is in the right format: features and target columns
df = pd.read_excel('Mental disorder symptoms.xlsx')

In [56]:
# Define features and target
X = df.drop('Disorder', axis=1)
y = df['Disorder']

In [57]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [58]:
# Initialize and train Logistic Regression model
log_reg = LogisticRegression(multi_class='multinomial', solver='lbfgs')
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)

In [59]:
# Initialize and train Decision Tree model
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)
y_pred_tree = decision_tree.predict(X_test)

In [60]:
# Initialize and train Random Forest model
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(X_train, y_train)
y_pred_forest = random_forest.predict(X_test)

In [61]:
# Initialize and train Support Vector Machine model
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

In [62]:
# Initialize and train Gradient Boosting model
gradient_boosting = GradientBoostingClassifier(random_state=42)
gradient_boosting.fit(X_train, y_train)
y_pred_gb = gradient_boosting.predict(X_test)

In [1]:
# Function to evaluate and print metrics
def evaluate_model(y_true, y_pred, model_name):
    print(f"\n{model_name} Classification Report:\n", classification_report(y_true, y_pred))
    print(f"{model_name} Accuracy:", accuracy_score(y_true, y_pred))

In [64]:
# Save the trained models using pickle
models = {
    'Logistic Regression': log_reg,
    'Decision Tree': decision_tree,
    'Random Forest': random_forest,
    'SVM': svm,
    'Gradient Boosting': gradient_boosting
}

In [65]:
for model_name, model in models.items():
    with open(f'{model_name.replace(" ", "_").lower()}_model.pkl', 'wb') as file:
        pickle.dump(model, file)
    print(f"{model_name} model saved as {model_name.replace(' ', '_').lower()}_model.pkl")

Logistic Regression model saved as logistic_regression_model.pkl
Decision Tree model saved as decision_tree_model.pkl
Random Forest model saved as random_forest_model.pkl
SVM model saved as svm_model.pkl
Gradient Boosting model saved as gradient_boosting_model.pkl


In [66]:
from sklearn.model_selection import cross_val_score

# Example for Logistic Regression
log_reg_cv_scores = cross_val_score(LogisticRegression(multi_class='multinomial', solver='lbfgs'), 
                                     X_train, y_train, cv=5)
print("Logistic Regression Cross-Validation Scores:", log_reg_cv_scores)
print("Mean Cross-Validation Score:", log_reg_cv_scores.mean())


Logistic Regression Cross-Validation Scores: [0.97058824 0.99019608 0.94117647 0.98039216 0.96039604]
Mean Cross-Validation Score: 0.9685497961560863


In [67]:
# Define a function to evaluate and cross-validate models
def evaluate_model_with_cv(model, X_train, y_train, model_name):
    cv_scores = cross_val_score(model, X_train, y_train, cv=5)  # 5-fold cross-validation
    print(f"{model_name} Cross-Validation Scores: {cv_scores}")
    print(f"Mean Cross-Validation Score for {model_name}: {cv_scores.mean():.4f}")

In [68]:
# Initialize and evaluate models with cross-validation
models = {
    'Logistic Regression': LogisticRegression(multi_class='multinomial', solver='lbfgs'),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(kernel='linear', random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}

In [69]:
warnings.filterwarnings("ignore")
for model_name, model in models.items():
    evaluate_model_with_cv(model, X_train, y_train, model_name)

Logistic Regression Cross-Validation Scores: [0.97058824 0.99019608 0.94117647 0.98039216 0.96039604]
Mean Cross-Validation Score for Logistic Regression: 0.9685
Decision Tree Cross-Validation Scores: [0.97058824 0.99019608 0.98039216 1.         0.99009901]
Mean Cross-Validation Score for Decision Tree: 0.9863
Random Forest Cross-Validation Scores: [0.97058824 0.99019608 0.96078431 1.         0.99009901]
Mean Cross-Validation Score for Random Forest: 0.9823
SVM Cross-Validation Scores: [0.97058824 1.         0.94117647 0.98039216 0.96039604]
Mean Cross-Validation Score for SVM: 0.9705
Gradient Boosting Cross-Validation Scores: [0.97058824 0.99019608 0.98039216 0.99019608 0.99009901]
Mean Cross-Validation Score for Gradient Boosting: 0.9843


In [70]:
warnings.filterwarnings("ignore")
# Train and evaluate models on the test set
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

In [71]:
warnings.filterwarnings("ignore")
# Train and evaluate models on the test set
# Example usage of the prediction function
# Replace with actual input values
example = [[29,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0]]
#example=scaler.fit_transform(example)# Update with actual feature values
y_pred = model.predict(example)
y_pred

array(['sleeping disorder'], dtype=object)

In [72]:
# Save the trained models using pickle
for model_name, model in models.items():
    with open(f'{model_name.replace(" ", "_").lower()}_model.pkl', 'wb') as file:
    pickle.dump(model, file)
    print(f"{model_name} model saved as {model_name.replace(' ', '_').lower()}_model.pkl")

Logistic Regression model saved as logistic_regression_model.pkl
Decision Tree model saved as decision_tree_model.pkl
Random Forest model saved as random_forest_model.pkl
SVM model saved as svm_model.pkl
Gradient Boosting model saved as gradient_boosting_model.pkl


In [73]:
# Example usage of the prediction function
# Replace with actual input values
example = [[29,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0]]  # Update with actual feature values
for model_name, model in models.items():
    y_pred = model.predict(example)
    print(f"{model_name}:"+ y_pred)

['Logistic Regression:sleeping disorder']
['Decision Tree:sleeping disorder']
['Random Forest:sleeping disorder']
['SVM:sleeping disorder']
['Gradient Boosting:sleeping disorder']
