In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [4]:
df = pd.read_csv("Training_final_v1.0.csv")

In [5]:
X = df.drop(columns=['result'])
y = df['result']

In [6]:
categorical_features = ['season_x', 'team_x', 'opp_team_name']
numerical_features = ['assists', 'bonus', 'bps', 'clean_sheets', 'creativity', 'goals_conceded', 'goals_scored', 'ict_index', 'influence', 'minutes', 'own_goals', 'penalties_missed', 'penalties_saved', 'red_cards', 'saves', 'selected', 'team_a_score', 'team_h_score', 'threat', 'total_points', 'transfers_balance', 'transfers_in', 'transfers_out', 'value', 'yellow_cards', 'GW']

# Define preprocessing steps for categorical and numerical features
categorical_transformer = OneHotEncoder(handle_unknown='ignore')
numerical_transformer = StandardScaler()

# Combine preprocessing steps for all features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('num', numerical_transformer, numerical_features)
    ])

# Create a pipeline with preprocessing and the Random Forest Classifier
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model using the pipeline
pipeline.fit(X_train, y_train)

# Evaluate the model
accuracy = pipeline.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.8679245283018868


In [7]:
# training with SVM
from sklearn.svm import SVC


In [8]:
# Create a pipeline with preprocessing and the SVM Classifier
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC())  # Use SVC instead of RandomForestClassifier
])


In [9]:
# Train the model using the pipeline
pipeline.fit(X_train, y_train)


In [10]:
# Evaluate the model
accuracy = pipeline.score(X_test, y_test)
print("Accuracy:", accuracy)


Accuracy: 0.8702830188679245


In [11]:
# Training model with KNN
from sklearn.neighbors import KNeighborsClassifier


In [12]:
# Create a pipeline with preprocessing and the KNN Classifier
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', KNeighborsClassifier())  # Use KNeighborsClassifier instead of SVC
])


In [13]:
# Train the model using the pipeline
pipeline.fit(X_train, y_train)


In [14]:
# Evaluate the model
accuracy = pipeline.score(X_test, y_test)
print("Accuracy:", accuracy)


Accuracy: 0.8160377358490566
