In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the datasets
train = pd.read_csv('/kaggle/input/playground-series-s4e2/train.csv')
test = pd.read_csv('/kaggle/input/playground-series-s4e2/test.csv')

# Encode categorical variables
le = LabelEncoder()
train['NObeyesdad'] = le.fit_transform(train['NObeyesdad'])
classes = le.classes_  # Save class labels
for col in ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS']:
    train[col] = le.fit_transform(train[col])
    test[col] = test[col].map(lambda x: le.transform([x])[0] if x in le.classes_ else -1)

# Feature selection
X = train.drop(columns=['id', 'NObeyesdad'])
y = train['NObeyesdad']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1234)

# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
test_data = scaler.transform(test.drop(columns=['id']))

# Model training & evaluation
def train_evaluate_model(model, param_grid, name):
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    print(f'{name} Best Accuracy: {acc:.4f}')
    print(classification_report(y_val, y_pred))
    return best_model

# Train models with hyperparameter tuning
log_reg = train_evaluate_model(LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=500), {'C': [0.1, 1, 10]}, 'Logistic Regression')
lda = train_evaluate_model(LinearDiscriminantAnalysis(), {'solver': ['svd', 'lsqr']}, 'LDA')
qda = train_evaluate_model(QuadraticDiscriminantAnalysis(), {'reg_param': [0.0, 0.1, 0.5]}, 'QDA')
nb = train_evaluate_model(GaussianNB(), {'var_smoothing': [1e-9, 1e-8, 1e-7]}, 'Naive Bayes')
svm = train_evaluate_model(SVC(kernel='linear', probability=True), {'C': [0.1, 1, 10]}, 'SVM')

# Generate test predictions and submission files
models = {'Logistic_Regression': log_reg, 'LDA': lda, 'QDA': qda, 'Naive_Bayes': nb, 'SVM': svm}
for name, model in models.items():
    predictions = np.vectorize(lambda x: classes[x] if x < len(classes) else 'Unknown')(model.predict(test_data))
    submission = pd.DataFrame({'id': test['id'], 'NObeyesdad': predictions})
    submission.to_csv(f'submission_{name}.csv', index=False)
    print(f'Submission file for {name} saved.')


Logistic Regression Best Accuracy: 0.8687
              precision    recall  f1-score   support

           0       0.88      0.95      0.91       510
           1       0.86      0.82      0.84       579
           2       0.84      0.84      0.84       588
           3       0.94      0.96      0.95       649
           4       1.00      1.00      1.00       830
           5       0.75      0.69      0.72       491
           6       0.71      0.72      0.72       505

    accuracy                           0.87      4152
   macro avg       0.85      0.85      0.85      4152
weighted avg       0.87      0.87      0.87      4152

LDA Best Accuracy: 0.8148
              precision    recall  f1-score   support

           0       0.83      0.90      0.86       510
           1       0.76      0.70      0.73       579
           2       0.79      0.76      0.77       588
           3       0.91      0.96      0.93       649
           4       0.99      0.99      0.99       830
          



Naive Bayes Best Accuracy: 0.6664
              precision    recall  f1-score   support

           0       0.69      0.84      0.76       510
           1       0.66      0.46      0.54       579
           2       0.40      0.65      0.49       588
           3       0.73      0.94      0.82       649
           4       0.95      0.99      0.97       830
           5       0.59      0.27      0.37       491
           6       0.53      0.25      0.34       505

    accuracy                           0.67      4152
   macro avg       0.65      0.63      0.61      4152
weighted avg       0.67      0.67      0.65      4152

SVM Best Accuracy: 0.8716
              precision    recall  f1-score   support

           0       0.88      0.95      0.91       510
           1       0.87      0.82      0.84       579
           2       0.85      0.84      0.84       588
           3       0.94      0.96      0.95       649
           4       1.00      0.99      1.00       830
           5      

In [14]:
import shutil

#This code just changes the names of the submission output to submit 
#Submission 1: Logistic Regression
# Copy the file while keeping the original
#shutil.copy('submission_Logistic_Regression.csv', 'submission.csv')

#Submission 2: LDA
# Copy the file while keeping the original
#shutil.copy('submission_LDA.csv', 'submission.csv')

#Submission 3: QDA
# Copy the file while keeping the original
#shutil.copy('submission_QDA.csv', 'submission.csv')

#Submission 4: Naive Bayes
# Copy the file while keeping the original
#shutil.copy('submission_Naive_Bayes.csv', 'submission.csv')

#Submission 5: SVM
# Copy the file while keeping the original
shutil.copy('submission_SVM.csv', 'submission.csv')

'submission.csv'