In [1]:
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, mean_squared_error
from imblearn.over_sampling import SMOTE

# Load the dataset
file_path = '/Users/michaelvincenttanedo/Downloads/tetris_data.csv'
data = pd.read_csv(file_path)

# Convert the 'state' column from string representation of list to actual list
data['state'] = data['state'].apply(literal_eval)

# Flatten the 'state' lists into separate columns
state_df = pd.DataFrame(data['state'].tolist())

# Encode the 'move' column to numerical values
label_encoder = LabelEncoder()
data['move_encoded'] = label_encoder.fit_transform(data['move'])

# Combine the state data with the other features
X = pd.concat([state_df, data[['score']]], axis=1)

# Ensure all column names are strings
X.columns = X.columns.astype(str)

y = data['move_encoded']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

# Initialize Logistic Regression with hyperparameter tuning using Grid Search
log_reg = LogisticRegression(max_iter=2000, random_state=42)
param_grid = {
    'C': [0.1, 1, 10, 100],
    'penalty': ['l1', 'l2', 'elasticnet'],
    'solver': ['saga']
}

grid_search = GridSearchCV(log_reg, param_grid, cv=StratifiedKFold(5), scoring='f1_micro')
grid_search.fit(X_train_resampled, y_train_resampled)

# Predict on the test set
y_pred = grid_search.predict(X_test_scaled)

# Print classification report
print(classification_report(y_test, y_pred))

# Calculate and print Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


20 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/michaelvincenttanedo/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/michaelvincenttanedo/opt/anaconda3/lib/python3.8/site-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/michaelvincenttanedo/opt/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1179, in fit
    raise ValueError("l1_ratio must be specified when penalty is elasticnet.")
ValueError

              precision    recall  f1-score   support

           0       0.40      0.40      0.40         5
           1       0.17      0.20      0.18         5
           2       0.25      0.40      0.31         5
           3       0.00      0.00      0.00         5

    accuracy                           0.25        20
   macro avg       0.20      0.25      0.22        20
weighted avg       0.20      0.25      0.22        20

Mean Squared Error: 2.05


