In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import numpy as np
import math

from sklearn.model_selection import train_test_split

import torch 
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

import matplotlib.pylab as plt
import seaborn as sns

# Variables del programa
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
    'Referer': 'https://example.com'
}

X_data = []
Y_data = []

transform = 'team_sum_diff'

def is_nan(value):
    try:
        # Intentar verificar si es un NaN numérico
        return math.isnan(float(value))
    except (TypeError, ValueError):
        # Si hay un error, significa que no es un número, por lo tanto no es NaN
        return False


def calcular_media(datos):
    claves = datos.keys()
    for clave in claves:
        avg = sum(datos[clave])/len(datos[clave])
        datos[clave] = [round(float(dato),2) for dato in avg]

    return datos


def create_embeddings(data_final):
    champ_embeddings = {}
    champion_list = data_final.keys()
    for champion in champion_list:
        champion_format = champion.replace(' ', '').replace("'", "").replace(".","").replace("&Willump","").replace("Glasc","").replace("Wukong","monkeyking").lower()
        url = f"https://www.leagueofgraphs.com/champions/stats/{champion_format}/master"
        response = requests.get(url, headers=headers)

        # Parse the HTML code using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract the relevant information from the HTML code
        physical_damage = float(soup.find('div', {'class': 'stacked_bar_area physical_damage_area requireTooltip'}).get('tooltip').split('%')[0])
        magic_damage = float(soup.find('div', {'class': 'stacked_bar_area magic_damage_area requireTooltip'}).get('tooltip').split('%')[0])
        true_damage = float(soup.find('div', {'class': 'stacked_bar_area true_damage_area requireTooltip'}).get('tooltip').split('%')[0])
        
        dmg_split= np.array([physical_damage, magic_damage, true_damage],dtype=float)

        embedding = list(data_final[champion][0]*dmg_split) + [data_final[champion][1]]
        champ_embeddings[champion] = [round(float(valor),2) for valor in embedding]

    return champ_embeddings


def load_matches():
    data_dict = {}
    data_pd = pd.read_csv('2024_LoL_esports_match_data_from_OraclesElixir.csv')
    data_pd = data_pd[['champion', 'damageshare', 'damagemitigatedperminute', 'result', 'league']]
    filtered_df = data_pd.dropna()
    # filtered_df = filtered_df[filtered_df['league'] == 'LCK'] # League filter

    data = filtered_df.to_numpy()
    for data_point in data:
        try:
            data_dict[data_point[0]].append(data_point[1:3])
        except KeyError:
            data_dict[data_point[0]] = [] # Initialized as a list to use append
            data_dict[data_point[0]].append(data_point[1:3])

    data_final = calcular_media(data_dict)

    for i in range(int(len(data)/10)):
        match_data = data[i*10:i*10 + 10]
        blue_team = [champ[0] for champ in match_data[:5]]
        red_team = [champ[0] for champ in match_data[5:]]

        X_data.append([blue_team,red_team])
        Y_data.append(match_data[0][3])

    champ_list = sorted(list(set([champ[0] for champ in data])))

    return data_final, X_data, Y_data, champ_list


def match2embeddings(matches, champ_embeddings):
    embedded_matches=[]
    for match in matches:
        embedded_matches.append([[champ_embeddings[champ] for champ in team] for team in match])

    return embedded_matches


def data_transformation(X_data, Y_data):
    # Apply the created embeddings to our data
    X_emb = match2embeddings(X_data,champ_embeddings)

    # Transform data in different ways to check which way is better
    if transform == 'team_sum_diff': # Add each member of the team then calculate the difference between the teams
        team_sum = [[[sum(column) for column in zip(*Team1)],[sum(column) for column in zip(*Team2)]]for (Team1, Team2) in X_emb]
        X_data = [[a - b for a, b in zip(Team1, Team2)] for (Team1,Team2) in team_sum]
    elif transform == 'flatten':
        X_data = np.array([np.array([np.array(Team1).flatten()],[np.array(Team2).flatten()]).flatten() for (Team1, Team2) in X_emb])
    elif transform == 'team_diff_flatten':
        X_data = np.array([np.array(Team1).flatten()-np.array(Team2).flatten() for (Team1, Team2) in X_emb])

    # Transform our data into tensors
    Y_tensor = torch.tensor(Y_data,dtype=torch.float32)
    X_tensor = torch.tensor(X_data,dtype=torch.float32)

    # Separate the data between test and train
    X_train, X_test, Y_train, Y_test = train_test_split(X_tensor, Y_tensor, test_size=0.2, random_state=0)

    # Transform the data to datasets
    train_dataset = TensorDataset(X_train, Y_train)
    test_dataset = TensorDataset(X_test, Y_test)

    # Define DataLoaders
    train_loader = DataLoader(dataset=train_dataset, batch_size=50, shuffle=True)
    validation_loader = DataLoader(dataset=test_dataset, batch_size=200, shuffle=False)

    return train_loader, validation_loader

def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [],'validation_loss':[], 'validation_accuracy': []}  
    
    for epoch in range(epochs):
        for x, y in train_loader:
            optimizer.zero_grad()
            z = model(x)
            loss = criterion(z,y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            useful_stuff['training_loss'].append(loss.data.item())
        
        correct = 0
        total = 0
        for x, y in validation_loader:
            z = model(x)
            loss = criterion(z,y.unsqueeze(1))
            predicted = (z > 0).float()  # Comparar con 0 en lugar de aplicar sigmoide
            total += y.size(0)
            correct += (predicted.squeeze(1) == y).sum().item()
            useful_stuff['validation_loss'].append(loss.data.item())
    
        accuracy = correct / total
        useful_stuff['validation_accuracy'].append(accuracy)
    
    return useful_stuff


def analyze_data(X_data):
    # Apply the created embeddings to our data
    X_emb = match2embeddings(X_data,champ_embeddings)

    # Transform data in different ways to check which way is better
    if transform == 'team_sum_diff': # Add each member of the team then calculate the difference between the teams
        team_sum = [[[sum(column) for column in zip(*Team1)],[sum(column) for column in zip(*Team2)]]for (Team1, Team2) in X_emb]
        X_data = [[a - b for a, b in zip(Team1, Team2)] for (Team1,Team2) in team_sum]
    elif transform == 'flatten':
        X_data = np.array([np.array([np.array(Team1).flatten()],[np.array(Team2).flatten()]).flatten() for (Team1, Team2) in X_emb])
    elif transform == 'team_diff_flatten':
        X_data = np.array([np.array(Team1).flatten()-np.array(Team2).flatten() for (Team1, Team2) in X_emb])

    return X_data

    


class Net(nn.Module):
    
    # Constructor
    def __init__(self, D_in,hidden_dim1,hidden_dim2,hiddem_dim3,D_out):
        super(Net, self).__init__()
        self.linear1 = nn.Linear(D_in, hidden_dim1)
        self.linear2 = nn.Linear(hidden_dim1,hidden_dim2)
        self.linear3 = nn.Linear(hidden_dim2,hiddem_dim3)
        self.linear4 = nn.Linear(hiddem_dim3,D_out)
        self.bn1 = nn.BatchNorm1d(hidden_dim1)
        self.bn2 = nn.BatchNorm1d(hidden_dim2)
        self.bn3 = nn.BatchNorm1d(hiddem_dim3)
    
    # Prediction
    def forward(self, x):
        x = torch.tanh(self.linear1(x))
        x = torch.tanh(self.linear2(x))
        x = torch.tanh(self.linear3(x))
        x = self.linear4(x)
        return x


if __name__ == '__main__':

    # Load the data from the matches
    match_dict, X_data, Y_data, champ_list = load_matches()

    # Create the embeddings for the champions
    champ_embeddings = create_embeddings(match_dict)

    X_data = analyze_data(X_data)

    df_matches = pd.DataFrame(X_data, columns=['Physical dmg', 'Magic dmg', 'True dmg', 'dmg Tanked'])

    df_matches['Win/Loss'] = Y_data

    _ = sns.pairplot(df_matches, hue='target')

    plt.show()