In [None]:
#For Extract the World Cup we install first this library : extract-wc-data
from ExtractWCData.get_latest_data import GetData
data = GetData()
df = data.get_data()

df.tail()

In [None]:
df.to_csv('latest_data.csv')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#Load all the Csv datasets

world_cup = pd.read_csv("world_cup_2023.csv")
world_cup.head()

In [None]:
results = pd.read_csv("results.csv")
results.tail()

In [None]:
latest = pd.read_csv('latest_data.csv')
latest.head()

In [None]:
print(f'World_Cup data shape : {world_cup.shape}')
print(f'Results data shape : {results.shape}')
print(f'Latest world cup data shape : {latest.shape}')

In [None]:
results = pd.concat([results,latest], axis = 0)

results = results.reset_index(drop=True)

results.tail()


In [None]:
results.shape

In [None]:
results.columns

In [None]:
results.drop(columns=['Date','Ground','Margin'], axis=1, inplace = True)
results.head()

In [None]:
world_cup_teams=['England','India','Pakistan','South Africa','New Zealand'
                'Sri Lanka','Afganistan','Bangladesh','Australia','Netherlands']

In [None]:
df_teams_1= results[results['Team_1'].isin(world_cup_teams)]
df_teams_2= results[results['Team_2'].isin(world_cup_teams)]
df_winners = results[results['Winner'].isin(world_cup_teams)]

df_team=pd.concat((df_teams_1,df_teams_2,df_winners), axis =0)
df_team.tail()


In [None]:
#So we covert string into numerical because in numerical data we analyse easy as compare to string
df_team.loc[:,'Winning']=np.where(df_team['Winner']==df_team['Team_1'],1,2)
df_team.tail()

In [None]:
df_team.drop(columns=['Unnamed: 0','Winner'],axis=1,inplace=True)
df_team.head()

In [None]:
#So we transform data So we use Sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [None]:
#Apply the encoding
df_team = pd.get_dummies(df_team,prefix=['Team_1','Team_2'], columns=['Team_1','Team_2'], dtype=int,sparse=False)
df_team.head()


In [None]:
x =df_team.drop(columns=['Winning'],axis=1)
y =df_team['Winning']

In [None]:
#Splitting the data in the training and testing set
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2,random_state=34)

In [None]:
x_train

In [None]:
y_train

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline


In [None]:
#Define Classifier

classifiers={
    'Random Forest': RandomForestClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier()
}

for name, clf in classifiers.items():
    pipeline = Pipeline([('classifier',clf)])

    pipeline.fit(x_train,y_train)

    #Make Prediction
    y_pred=pipeline.predict(x_test)

    #Calculate the accuracy
    acc = accuracy_score(y_test,y_pred)

    print(f'{name}: ')
    print(f"Accuracy : {acc:.4f}")

In [None]:
rf = RandomForestClassifier()
rf.fit(x_train,y_train)

In [None]:
predictions=rf.predict(x_test)

In [None]:
label_to_team ={1:'Team_1',2:'Team_2'}

Winner=[label_to_team[label] for label in predictions]

print(Winner)

In [None]:
rankings = pd.read_csv('Icc_ranking.csv')
rankings.head()

In [None]:
fixtures = pd.read_csv("Fixtures.csv")
fixtures.head()

In [None]:
pred_set =[]

fixtures.insert(1, 'first_position',fixtures['Team_1'].map(rankings.set_index('Team_name')['Team_ranking']))
fixtures.insert(2, 'second_position',fixtures['Team_2'].map(rankings.set_index('Team_name')['Team_ranking']))

fixtures=fixtures.iloc[:80 ,:]
fixtures.head()

In [None]:
for index, row in fixtures.iterrows():
    if row['first_position'] < row['second_position']:

        pred_set.append({'Team_1' :row['Team_1'] , 'Team_2' :row['Team_2'] , 'Winning_team' : None })
    else:
        pred_set.append({'Team_1' :row['Team_2'] , 'Team_2' :row['Team_1'] , 'Winning_team' : None })

pred_set = pd.DataFrame(pred_set)

pred_set.head()

In [None]:
backup_pred_set=pred_set

In [None]:
pred_set = pd.get_dummies(pred_set, prefix=['Team_1', 'Team_2'], columns=['Team_1', 'Team_2'], dtype=int)

missing_cols = set(df_team.columns) - set(pred_set.columns)

In [None]:
for cols in missing_cols :
    pred_set[cols] = 0

pred_set = pred_set[df_team.columns]

pred_set = pred_set.drop(['Winning'], axis=1)
pred_set.head()

In [None]:
predictions = rf.predict(pred_set)
for i in range(fixtures.shape[0]):

    print(backup_pred_set.iloc[i,1] + " Vs " + backup_pred_set.iloc[i,0])
    if predictions[i]==1 :
        print('Winner : ' + backup_pred_set.iloc[i,1])
    else :
        print('Winner : ' + backup_pred_set.iloc[i,0])
    print("")

In [None]:
latest.head()

In [None]:
latest.drop(columns = ['Unnamed: 0'], axis=1 , inplace=True)
latest.head()

In [None]:
top_winners = latest['Winner'].value_counts().head(4).index.tolist()

print(f"Top 4 teams : {top_winners}")

In [None]:
# Predict the single match results of future

def predict_single_match(model, rankings, team_1, team_2):
  single_match_data = pd.DataFrame({
      'Team_1': [team_1],
      'Team_2':[team_2]
  })

  #Insert the team ranking data
  single_match_data.insert(1, 'first_position',single_match_data['Team_1'].map(rankings.set_index("Team_name")['Team_ranking']))
  single_match_data.insert(2, 'second_position',single_match_data['Team_2'].map(rankings.set_index("Team_name")['Team_ranking']))

  # Apply one hot encoding
  single_match_data = pd.get_dummies(single_match_data, prefix = ['Team_1', 'Team_2'], columns=['Team_1', 'Team_2'], dtype=int)

  #Find the missing columns
  missing_cols = set(df_team.columns) - set(single_match_data.columns)

  #Set the missing columns to 0 and then keep only the columns present

  for col in missing_cols:
    single_match_data[col] = 0

  single_match_data = single_match_data[df_team.columns]

  #Drop the winning column
  single_match_data = single_match_data.drop(['Winning'], axis=1)

  #Making the prediction
  prediction = model.predict(single_match_data)

  #print the result
  print(f"{team_1} vs {team_2}")

  if prediction[0] ==1 :
    print(f"Winner: {team_1}")
  else:
    print(f"Winner: {team_2}")

  print((""))

Predicting the First Semi Final :


In [None]:
predict_single_match(rf, rankings, "India", "New Zealand")

Second Semi Final Result:


In [None]:
predict_single_match(rf, rankings, "Australia", "South Africa")

Predict the Final Match Result:


In [None]:
predict_single_match(rf, rankings, "India", "South Africa")