In [1]:
from faker import Faker
import random
import pandas as pd

# Create a Faker object
fake = Faker()

# Number of players and opponent teams
num_players = 10
num_opponent_teams = 5

# Generate player names
player_names = [fake.name() for _ in range(num_players)]

# Generate team names for both own team and opponent teams
team_names = [fake.word() + ' ' + fake.word() + ' FC' for _ in range(num_opponent_teams)]
opponent_teams = [team_name for team_name in team_names]

# Create a dictionary to keep track of goals scored by each player against each opponent team
player_goals = {player_name: {team: 0 for team in opponent_teams} for player_name in player_names}

# Create an empty list to store the data
data = []

# Generate fake soccer matches
while any(player_goals[player_name][team] < 50 for player_name in player_names for team in opponent_teams):
    player_name = random.choice(player_names)
    opponent_team = random.choice(opponent_teams)
    
    # Check if the player has scored less than 50 goals against the opponent team
    if player_goals[player_name][opponent_team] < 50:
        city = fake.city()
        goals_scored = random.randint(0, 5)
        match_result = 'Win' if goals_scored > 0 else 'Lose'
        
        # Update the number of goals scored by the player against the opponent team
        player_goals[player_name][opponent_team] += goals_scored
        
        # Append the match data to the list
        data.append([player_name, player_name.split()[-1] + ' FC', opponent_team, city, goals_scored, match_result])

# Create a DataFrame from the list of data
columns = ['Player Name', 'Team Name', 'Opponent Team Name', 'City', 'Goals Scored', 'Match Result']
df = pd.DataFrame(data, columns=columns)

# Save the DataFrame to a CSV file
df.to_csv('fake_soccer_dataset.csv', index=False)




In [None]:
df.head()

In [3]:
df['Match Result']=df['Match Result'].map({"Win":1,'Lose':0})

In [None]:
import numpy as np
import pandas as pd
from keras.models import Model
from keras.layers import Input, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder,LabelEncoder 

In [5]:
X = df.drop(['Goals Scored', 'Match Result'], axis=1)
y_goal_scored = df['Goals Scored']
y_match_winner = df['Match Result']

In [None]:
encoder_player_name = LabelEncoder()
encoder_team_name = LabelEncoder()
encoder_opponent_team_name = LabelEncoder()
encoder_city = LabelEncoder()

X['Player Name'] = encoder_player_name.fit_transform(X['Player Name'])
X['Team Name'] = encoder_team_name.fit_transform(X['Team Name'])
X['Opponent Team Name'] = encoder_opponent_team_name.fit_transform(X['Opponent Team Name'])
X['City'] = encoder_city.fit_transform(X['City'])

# Convert categorical columns to one-hot encoding
encoder = OneHotEncoder(sparse=False)
X_encoded = encoder.fit_transform(X[['Player Name', 'Team Name', 'Opponent Team Name', 'City']])

# Combine the one-hot encoded features with the remaining numeric features
X_final = pd.concat([pd.DataFrame(X_encoded), X.drop(['Player Name', 'Team Name', 'Opponent Team Name', 'City'], axis=1)], axis=1)


In [None]:
X_final.head()

In [8]:
X_train, X_test, y_goal_scored_train, y_goal_scored_test, y_match_winner_train, y_match_winner_test = train_test_split(
    X_final, y_goal_scored, y_match_winner, test_size=0.2, random_state=42)

In [9]:
def create_model(input_dim):
    inputs = Input(shape=(input_dim,))
    x = Dense(64, activation='relu')(inputs)
    x = Dense(64, activation='relu')(x)

    # Separate output heads for goal scored and match winner
    output_goal_scored = Dense(1, name='output_goal_scored')(x)
    output_match_winner = Dense(1, activation='sigmoid', name='output_match_winner')(x)

    model = Model(inputs=inputs, outputs=[output_goal_scored, output_match_winner])
    model.compile(optimizer='adam', loss=['mean_squared_error', 'binary_crossentropy'])

    return model

In [None]:
input_dim = X_train.shape[1]
model = create_model(input_dim)
model.fit(X_train, [y_goal_scored_train, y_match_winner_train], epochs=50, batch_size=32)


In [None]:
loss = model.evaluate(X_test, [y_goal_scored_test, y_match_winner_test])
print("Loss for goal scored prediction:", loss[0])
print("Loss for match winner prediction:", loss[1])


In [19]:
new_data_point = pd.DataFrame({
    'Player Name': ['Justin Porter'],
    'Team Name': ['Porter FC'],
    'Opponent Team Name': ['throughout technology FC'],
    'City': ['Dustinstad']
})

In [None]:
# Encode the new data point
new_data_point['Player Name'] = encoder_player_name.transform(new_data_point['Player Name'])
new_data_point['Team Name'] = encoder_team_name.transform(new_data_point['Team Name'])
new_data_point['Opponent Team Name'] = encoder_opponent_team_name.transform(new_data_point['Opponent Team Name'])
new_data_point['City'] = encoder_city.transform(new_data_point['City'])

# Convert categorical columns to one-hot encoding and concatenate with numeric features
new_data_point_encoded = encoder.transform(new_data_point[['Player Name', 'Team Name', 'Opponent Team Name', 'City']])
new_data_point_final = pd.concat([pd.DataFrame(new_data_point_encoded), new_data_point.drop(['Player Name', 'Team Name', 'Opponent Team Name', 'City'], axis=1)], axis=1)

# Make predictions
predicted_goal_scored, predicted_match_winner = model.predict(new_data_point_final)
print("Predicted goal scored:", predicted_goal_scored[0][0])
print("Predicted match winner probability:", predicted_match_winner[0][0])


In [39]:
input_data={
    'Player Name': 'Justin Porter',
    'Team Name': 'Porter FC',
    'Opponent Team Name': 'throughout technology FC',
    'City': 'Dustinstad'
}

In [None]:
fake_var,predicted_match_winner = model.predict(new_data_point_final)
result= int(predicted_match_winner[0][0])
if result==1:
    print("Yes,this team have high chance to win")
else:
    print("this team have chance to lose this match")

In [None]:
predicted_goal_scored=model.predict(new_data_point_final)
goal= predicted_goal_scored[0][0][0].round()

print(f"{input_data['Player Name']} can score {goal} goals")
