# Phase 2 — Baseline Models

### Importing Libraries

In [87]:
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score , confusion_matrix , classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


### Loading the Dataset

In [88]:
# loading data
train_df = pd.read_csv("train.csv")
val_df = pd.read_csv("val.csv")
test_df = pd.read_csv("test.csv")

# takeing a peak at data 
train_df.head()

# turning them into matreses 
X_train = train_df[[f"p{i}" for i in range(1, 43)] + ["turn"]].values.astype("int")
y_train = train_df["label_move_col"].values.astype("int")

X_val = val_df[[f"p{i}" for i in range(1, 43)] + ["turn"]].values.astype("int")
y_val = val_df["label_move_col"].values.astype("int")

X_test = test_df[[f"p{i}" for i in range(1, 43)] + ["turn"]].values.astype("int")



### Standardization of Features

In [89]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.fit_transform(X_val)
X_test_scaled = scaler.fit_transform(X_test)

train_df["label_move_col"].value_counts().sort_index()

label_move_col
0     7136
1     4111
2     4923
3    17653
4     4923
5     4111
6     7136
Name: count, dtype: int64

# Logistic Regression Model
### Training and Evaluation

In [90]:
# training logistic regression

Logistic_regression_model =  LogisticRegression(max_iter = 3000 , solver ="lbfgs" ,class_weight="balanced" , C=1 ,random_state=42 )

Logistic_regression_model.fit(X_train_scaled , y_train)

# predict on validation data
y_val_pred = Logistic_regression_model.predict(X_val_scaled)

#accuracy
val_acc = accuracy_score(y_val , y_val_pred)
print("validation accuracy: ", val_acc)

# confusion matrix
print("\nconfusion matrix: \n", confusion_matrix(y_val , y_val_pred))

validation accuracy:  0.17672173751624282

confusion matrix: 
 [[194 208 259 185 249 219 136]
 [ 83 207 169  85 133 148 100]
 [ 90 133 281 123 175 137  94]
 [364 575 769 542 769 575 364]
 [ 94 137 175 123 281 133  90]
 [100 148 133  85 169 207  83]
 [138 219 249 185 259 208 192]]


# Decision Tree Model
### Training and Evaluation



In [91]:
# training decision tree
Decision_tree_model = DecisionTreeClassifier(random_state=42, class_weight="balanced")
Decision_tree_model.fit(X_train_scaled, y_train)

# predict on validation data
y_val_pred = Decision_tree_model.predict(X_val_scaled)

# accuracy
val_acc = accuracy_score(y_val, y_val_pred)
print("Validation accuracy: ", val_acc)

# confusion matrix
print("\nconfusion matrix: \n", confusion_matrix(y_val, y_val_pred))


Validation accuracy:  0.48422127343604976

confusion matrix: 
 [[ 515  110  122  294  118   68  223]
 [ 103  359   72  155   79   72   85]
 [ 107   63  421  175   80   66  121]
 [ 280  183  193 2619  189  193  301]
 [ 135   68   73  172  425   60  100]
 [  93   68   78  157   65  357  107]
 [ 229   89  104  299   99  109  521]]


# Random Forest Model
### Training and Evaluation

In [92]:
#  Training the Random Forest model
rf = RandomForestClassifier(n_estimators=300, random_state=42, class_weight="balanced")
rf.fit(X_train_scaled, y_train)

# predict on validation data
y_val_pred = rf.predict(X_val_scaled)

# accuracy
val_acc = accuracy_score(y_val, y_val_pred)
print("Validation accuracy:", val_acc)

# confusion matrix
print("\nconfusion matrix: \n", confusion_matrix(y_val, y_val_pred))


Validation accuracy: 0.589660293298682

confusion matrix: 
 [[ 657   53   63  400   72   42  163]
 [  87  379   41  261   51   43   63]
 [  90   33  447  268   38   42  115]
 [ 138   55   80 3412   72   54  147]
 [ 112   37   44  269  453   31   87]
 [  64   43   54  265   37  375   87]
 [ 164   42   77  413   71   53  630]]


###  Final Prediction with the Best Model: Random Forest

In [93]:
# predict on test data
y_test_pred = rf.predict(X_test_scaled)

Prediction = pd.DataFrame({"id": range( 1 , len(y_test_pred) +1 ), "label_move_col" : y_test_pred })
Prediction.to_csv("prediction.csv" , index = False)