In [2]:
import pandas as pd

# Load the Excel file

df = pd.read_excel(r'C:\Users\ROG\Desktop\finalized ml.xlsx')

# Display the first few rows to understand the column names
print(df.head())


                        Date      Time    HometeamName   AwayTeamName  \
0   6 July 1960 (1960-07-06)  20:00:00          France     Yugoslavia   
1   6 July 1960 (1960-07-06)  21:30:00  Czechoslovakia   Soviet Union   
2   9 July 1960 (1960-07-09)  21:30:00  Czechoslovakia         France   
3  10 July 1960 (1960-07-10)  21:30:00    Soviet Union     Yugoslavia   
4  17 June 1964 (1964-06-17)  20:00:00           Spain        Hungary   

   HomeTeamGoals  AwayTeamGoals                 Stage       City  Attendance  \
0            4.0            5.0           Semi-finals      Paris     26370.0   
1            0.0            3.0           Semi-finals  Marseille     25184.0   
2            2.0            0.0  Third place play-off  Marseille      9438.0   
3            2.0            1.0                 Final      Paris     17966.0   
4            2.0            1.0           Semi-finals     Madrid     34713.0   

     Year  ...  HomeTeam Keyplayers  AwayTeam Keyplayers  HomeTeam Injuries  \
0

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier

# Define features and target
df['Result'] = df.apply(lambda row: 'Win' if row['HomeTeamGoals'] > row['AwayTeamGoals'] 
                        else ('Lose' if row['HomeTeamGoals'] < row['AwayTeamGoals'] else 'Draw'), axis=1)

# Features: Home and Away Goals
features = df[['HomeTeamGoals', 'AwayTeamGoals']]
target = df['Result']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train kNN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predictions
y_train_pred = knn.predict(X_train)
y_test_pred = knn.predict(X_test)

# Evaluate performance
def evaluate_performance(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred, labels=['Win', 'Lose', 'Draw'])
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    return cm, precision, recall, f1

# Training metrics
cm_train, precision_train, recall_train, f1_train = evaluate_performance(y_train, y_train_pred)
print("Training Performance:")
print("Confusion Matrix:\n", cm_train)
print(f"Precision: {precision_train}, Recall: {recall_train}, F1 Score: {f1_train}")

# Test metrics
cm_test, precision_test, recall_test, f1_test = evaluate_performance(y_test, y_test_pred)
print("\nTest Performance:")
print("Confusion Matrix:\n", cm_test)
print(f"Precision: {precision_test}, Recall: {recall_test}, F1 Score: {f1_test}")


ModuleNotFoundError: No module named 'sklearn'

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import numpy as np
from sklearn.linear_model import LinearRegression

# Features and target for regression
features = df[['HomeTeamGoals', 'AwayTeamGoals']]
target = df['Attendance']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train a regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Predictions
y_train_pred = regressor.predict(X_train)
y_test_pred = regressor.predict(X_test)

# Evaluate regression performance
def evaluate_regression(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mape, r2

# Training metrics
mse_train, rmse_train, mape_train, r2_train = evaluate_regression(y_train, y_train_pred)
print("\nTraining Regression Performance:")
print(f"MSE: {mse_train}, RMSE: {rmse_train}, MAPE: {mape_train}, R2: {r2_train}")

# Test metrics
mse_test, rmse_test, mape_test, r2_test = evaluate_regression(y_test, y_test_pred)
print("\nTest Regression Performance:")
print(f"MSE: {mse_test}, RMSE: {rmse_test}, MAPE: {mape_test}, R2: {r2_test}")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Generate random data points
np.random.seed(0)
X = np.random.uniform(1, 10, 20)
Y = np.random.uniform(1, 10, 20)
classes = np.random.choice([0, 1], size=20)

# Scatter plot
plt.scatter(X[classes == 0], Y[classes == 0], color='blue', label='Class 0')
plt.scatter(X[classes == 1], Y[classes == 1], color='red', label='Class 1')
plt.xlabel('Feature X')
plt.ylabel('Feature Y')
plt.legend()
plt.title('Training Data Scatter Plot')
plt.show()


In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Generate test data
test_X, test_Y = np.meshgrid(np.arange(0, 10.1, 0.1), np.arange(0, 10.1, 0.1))
test_data = np.c_[test_X.ravel(), test_Y.ravel()]

# Train kNN
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(np.c_[X, Y], classes)

# Predict test data
predicted_classes = knn.predict(test_data)
predicted_classes = predicted_classes.reshape(test_X.shape)

# Plot
plt.contourf(test_X, test_Y, predicted_classes, alpha=0.3, cmap='coolwarm')
plt.scatter(X[classes == 0], Y[classes == 0], color='blue', label='Class 0')
plt.scatter(X[classes == 1], Y[classes == 1], color='red', label='Class 1')
plt.xlabel('Feature X')
plt.ylabel('Feature Y')
plt.legend()
plt.title('kNN Classification Boundaries')
plt.show()


In [None]:
def plot_knn_with_different_k(X, Y, classes, test_data, test_X, test_Y, k_values):
    for k in k_values:
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(np.c_[X, Y], classes)
        predicted_classes = knn.predict(test_data).reshape(test_X.shape)
        
        plt.figure()
        plt.contourf(test_X, test_Y, predicted_classes, alpha=0.3, cmap='coolwarm')
        plt.scatter(X[classes == 0], Y[classes == 0], color='blue', label='Class 0')
        plt.scatter(X[classes == 1], Y[classes == 1], color='red', label='Class 1')
        plt.xlabel('Feature X')
        plt.ylabel('Feature Y')
        plt.legend()
        plt.title(f'kNN Classification Boundaries (k={k})')
        plt.show()

# Example usage
plot_knn_with_different_k(X, Y, classes, test_data, test_X, test_Y, [1, 3, 5, 7])


In [4]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {'n_neighbors': np.arange(1, 20)}

# Initialize GridSearchCV
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
grid_search.fit(np.c_[X, Y], classes)

# Best parameters
best_k = grid_search.best_params_['n_neighbors']
print(f'Best k value: {best_k}')


ModuleNotFoundError: No module named 'sklearn'