<a href="https://colab.research.google.com/github/Keerthana8888/Driver-Alertness-Prediction-/blob/main/Code-File/Stay_Alert_Ford.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Approach To Data

1. Load the data and libraries
2. Data Preparation and Data Transformation
3. Data Visualization
4. Data Analysis
   1. Uni Variate
   2. Bi- Variate
   3. Multi- Variate
      1. Classification
         1. Logistic Regression
         2. Decision Tree
         3. Random Forest
         4. Boosting
5. Evaluate the model
6. Apply the model to test data
7. Evaluate results using the submission data

1. Dataset Name: Ford Stay Alert
2. Author: Keertthana R
3. Date: 23/02/2024
4. Version: 1


# Load Data And Libraries

In [None]:
#Standard Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Models Selection
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier , AdaBoostClassifier , GradientBoostingClassifier
from xgboost import XGBClassifier
# Evaluators
from sklearn.metrics import accuracy_score , classification_report , confusion_matrix

 **import the zip file**

In [None]:
!unzip '/content/stayalert (4).zip'

In [None]:
train = pd.read_csv('/content/fordTrain.csv')
test  = pd.read_csv('/content/fordTest.csv')

In [None]:
train.info()

In [None]:
train.head()

In [None]:
train.skew()

# Exploratory Data Analysis

In [None]:
train.describe()

In [None]:
for i in train.columns:
  if i not in ['TrialID' , 'ObsNum' , 'IsAlert', 'P8', 'V7' , 'V9']:
    plt.figure(figsize=(5,5))
    sns.kdeplot(data=train , x= i , hue='IsAlert' , multiple='stack')
    plt.title(f"kdeplot for {i}")
  else:
    continue

## Uni-Varient

In [None]:
for i in train:
    plt.figure(figsize=(5,5))
    sns.displot(train[i] , kde = True)
    plt.title(f"displot for {i}")

## Bi-Varient

In [None]:
plt.figure(figsize=(30,30))
sns.heatmap(train.corr(), annot=True)

# Data Preprocessing

In [None]:
y=train['IsAlert']
x=train.iloc[:,3:]

# Data Splitting

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 123)

In [None]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

# Multi-Varient

## Logistic Regression

In [None]:
log_reg=LogisticRegression(max_iter=1000)
log_reg.fit(x_train, y_train)
y_hat_train_logreg=log_reg.predict(x_train)
y_hat_test_logreg=log_reg.predict(x_test)

In [None]:
def model_eval(actual, predicted):
  conf_matrix=confusion_matrix(actual,predicted)
  acc_score = accuracy_score(actual, predicted)
  clas_rep = classification_report(actual, predicted)
  print('The Accuracy of the model is: ', round(acc_score,2))
  print(conf_matrix)
  print(clas_rep)

In [None]:
model_eval(y_train, y_hat_train_logreg)

In [None]:
model_eval(y_test, y_hat_test_logreg)

## Decision Tree

In [None]:
dtree = DecisionTreeClassifier()
dtree.fit(x_train, y_train)

In [None]:
y_hat_train_dtree = dtree.predict(x_train)
y_hat_test_dtree = dtree.predict(x_test)

In [None]:
model_eval(y_train , y_hat_train_dtree)

In [None]:
model_eval(y_test , y_hat_test_dtree)

## Random Forest

In [None]:
rf=RandomForestClassifier()
rf.fit(x_train, y_train)

In [None]:
y_hat_train_rf=rf.predict(x_train)
y_hat_test_rf=rf.predict(x_test)

In [None]:
model_eval(y_train, y_hat_train_rf)

In [None]:
model_eval(y_test, y_hat_test_rf)

## Feature

In [None]:
importances = rf.feature_importances_

In [None]:
importances_df = pd.DataFrame({'feature': x_train.columns, 'importance': importances})

In [None]:
importances_df = importances_df.sort_values('importance', ascending=False)

In [None]:
importances_df

## AdaBoost

In [None]:
ada=AdaBoostClassifier()
ada.fit(x_train, y_train)

In [None]:
y_train_ada=ada.predict(x_train)
y_hat_test_ada=ada.predict(x_test)

In [None]:
model_eval(y_train, y_train_ada)

In [None]:
model_eval(y_test, y_hat_test_ada)

## Gradient Boosting

In [None]:
gb=GradientBoostingClassifier(n_estimators=150)
gb.fit(x_train, y_train)

In [None]:
y_hat_train_gb=gb.predict(x_train)
y_hat_test_gb=gb.predict(x_test)

In [None]:
model_eval(y_train, y_hat_train_gb)

In [None]:
model_eval(y_test, y_hat_test_gb)

## XGBoost

In [None]:
xgb=XGBClassifier(n_estimators=250)
xgb.fit(x_train, y_train)

In [None]:
y_hat_train_xgb=xgb.predict(x_train)
y_hat_test_xgb=xgb.predict(x_test)

In [None]:
model_eval(y_train, y_hat_train_xgb)

In [None]:
model_eval(y_test, y_hat_test_xgb)

# Best Model

In [None]:
accuracy_table=[['Logistic Regression',round(accuracy_score(y_test , y_hat_test_logreg),2)],
 ['Decision Tree ', round(accuracy_score(y_test , y_hat_test_dtree),2)],
  ['Random Forest' , round(accuracy_score(y_test , y_hat_test_rf))],
  ['Ada Boosting' , round(accuracy_score(y_test , y_hat_test_ada))],
  ['Gradient Boosting' , round(accuracy_score(y_test , y_hat_test_gb))],
  ['XG Boosting' , round(accuracy_score(y_test , y_hat_test_xgb))]]
df1 = pd.DataFrame(accuracy_table, columns = ['Model','Test_Accuracy'])
print(df1)

In [None]:
confu_metrics = metrics.confusion_matrix(y_test,y_hat_test_rf)

df_cm = pd.DataFrame(confu_metrics, index = [i for i in ["yes","no"]],
                  columns = [i for i in ["Predict 1","Predict 0"]])
plt.figure(figsize = (7,5))
sns.heatmap(df_cm,annot=True,fmt='g' , cmap = 'magma' ,linewidths = 2 , linecolor = 'Black')

# Application on Test Dataset

In [None]:
test.info()

In [None]:
test.head()

In [None]:
x_variable = test.iloc[: , 3:]

In [None]:
test['IsAlert'] = rf.predict(x_variable)

In [None]:
test['IsAlert'].head()

# Comparing With Solution Set

In [None]:
solution = pd.read_csv('/content/Solution.csv')

In [None]:
solution.head()

In [None]:
comparison = solution['Prediction'] == test['IsAlert']

In [None]:
print(comparison)