# Biblioteca

In [1]:
import pandas as pd

# Leitura de dados

In [19]:
df = pd.read_csv('../data/raw/002_20_all_Q.csv')

In [20]:
class tratamento_nomes_colunas:
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        rename_columns = df.columns.to_series().apply(lambda x: x.replace('statistics.', '')).to_dict()
        X_rename = df.rename(mapper=rename_columns, axis=1)
        return X_rename



In [21]:
df_rename = tratamento_nomes_colunas().transform(df)

In [30]:
class descobre_vencedor:
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        
        df_rename_winner = X[['teamName', 'gameId', 'fieldGoalsMade']].groupby(by=['gameId', 'teamName'],
                                                                             as_index=False)['fieldGoalsMade'].sum()
        
        vencedor = df_rename_winner.groupby(by=['gameId']).apply(lambda linha: linha.iloc[0,1] if linha.iloc[0,2] > linha.iloc[1,2] else linha.iloc[1,1])
        vencedor.name = 'vencedor'
        
        dado_vencedor = df_rename_winner.join(vencedor, on='gameId')
        
        dados_raw = dado_vencedor.groupby('gameId').apply(lambda x: (x['teamName'], x['vencedor'].iloc[0], x['fieldGoalsMade']))

        input_data = ['fieldGoalsMade', 'fieldGoalsAttempted', 'threePointersMade', 'threePointersAttempted', 'freeThrowsMade', 'freeThrowsAttempted',
        'reboundsOffensive', 'reboundsDefensive', 'assists', 'steals', 'blocks', 'turnovers', 'foulsPersonal', 'points', 'Q']
        
        output_data = ['Result']

        df_input = df_rename[['gameId', 'teamName'] + input_data].groupby(['gameId', 'teamName', 'Q']).sum().copy().reset_index()

        df_input['resultado'] = df_input[['teamName', 'gameId']].apply(self.__arruma_q, args=(dado_vencedor,), axis=1)

        return df_input

    def __arruma_q(self, linha, dado_vencedor):
        vencedor = dado_vencedor[['vencedor', 'gameId']].query('gameId == @linha["gameId"]')['vencedor'].iloc[0]
        if vencedor == linha['teamName']:
            return 'W'
        else:
            return 'L'

In [31]:
df_input = descobre_vencedor().transform(df_rename)

In [33]:
df_input.to_csv('../data/processed/df_input.csv', index=False)

# Modelo utilizando apenas o nome dos times ?

In [26]:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [27]:
X = df_input.drop(['gameId', 'teamName', 'resultado'], axis=1)
y = df_input['resultado']

In [28]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [29]:
clf = LogisticRegressionCV(cv=5, random_state=0, dual = False, max_iter=1000).fit(X_train, y_train)

clf.score(X_test, y_test)

0.6394745938472174

In [30]:
rfc = RandomForestClassifier(random_state=0).fit(X_train, y_train)

rfc.score(X_test, y_test)

0.6156239198064293

In [31]:
X.columns

Index(['Q', 'fieldGoalsMade', 'fieldGoalsAttempted', 'threePointersMade',
       'threePointersAttempted', 'freeThrowsMade', 'freeThrowsAttempted',
       'reboundsOffensive', 'reboundsDefensive', 'assists', 'steals', 'blocks',
       'turnovers', 'foulsPersonal', 'points'],
      dtype='object')

In [32]:
y.name

'resultado'

In [33]:
rfc.score(X_train, y_train)

1.0

In [34]:
rfc.score(X_test, y_test)

0.6156239198064293

In [35]:
rfc.score(X,
          y)

0.8731172980374259

In [36]:
rfc.score(df_input.drop(['gameId', 'teamName', 'resultado'], axis=1).query('Q == 1'),
          df_input.query('Q == 1')['resultado'])

0.8675925925925926

In [37]:
rfc.score(df_input.drop(['gameId', 'teamName', 'resultado'], axis=1).query('Q == 2'),
          df_input.query('Q == 2')['resultado'])

0.8800925925925925

In [38]:
rfc.score(df_input.drop(['gameId', 'teamName', 'resultado'], axis=1).query('Q == 3'),
          df_input.query('Q == 3')['resultado'])

0.8861111111111111

In [39]:
rfc.score(df_input.drop(['gameId', 'teamName', 'resultado'], axis=1).query('Q == 4'),
          df_input.query('Q == 4')['resultado'])

0.8606481481481482

In [40]:
hip_param = clf.get_params()
hip_param

{'Cs': 10,
 'class_weight': None,
 'cv': 5,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1.0,
 'l1_ratios': None,
 'max_iter': 1000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 0,
 'refit': True,
 'scoring': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0}

In [41]:
model = LogisticRegressionCV(**hip_param).fit(X, y)

In [42]:
import pickle

In [43]:
filename = 'finalized_model.sav'
pickle.dump(model, open('..\models\\' + filename, 'wb'))

In [44]:
X.columns

Index(['Q', 'fieldGoalsMade', 'fieldGoalsAttempted', 'threePointersMade',
       'threePointersAttempted', 'freeThrowsMade', 'freeThrowsAttempted',
       'reboundsOffensive', 'reboundsDefensive', 'assists', 'steals', 'blocks',
       'turnovers', 'foulsPersonal', 'points'],
      dtype='object')

In [45]:
len(['Q', 'fieldGoalsMade', 'fieldGoalsAttempted', 'threePointersMade',
'threePointersAttempted', 'freeThrowsMade', 'freeThrowsAttempted',
'reboundsOffensive', 'reboundsDefensive', 'assists', 'steals', 'blocks',
'turnovers', 'foulsPersonal', 'points'])

15

In [46]:
# Q = int(input("1 - Está em qual quarto do Jogo ? (1, 2, 3, 4)"))
# fieldGoalsMade = int(input("2 - Quantos pontos seu time fez ? (1, 2, 3, 4)"))
# fieldGoalsAttempted = int(input("3 - Quantas tentativas de arremesso ? (1, 2, 3, 4)"))
# threePointersMade = int(input("4 - Quantos pontos de 3 ? (1, 2, 3, 4)"))
# threePointersAttempted = int(input("5 - Quantas tentativas de 3 ? (1, 2, 3, 4)"))
# freeThrowsMade = int(input("6 - Quantos arremessos livres ? (1, 2, 3, 4)"))
# freeThrowsAttempted = int(input("7 - Quantos tentativas de arremessos livre ? (1, 2, 3, 4)"))
# reboundsOffensive = int(input("8 - Quantos rebotes ofensivos ? (1, 2, 3, 4)"))
# reboundsDefensive = int(input("9 - Quantos rebotes defencivos ? (1, 2, 3, 4)"))
# assists = int(input("10 - Total de assistencias  ? (1, 2, 3, 4)"))
# steals = int(input("11 - Total de roubos de bola ? (1, 2, 3, 4)"))
# blocks = int(input("12 - Total de bloqueios ? (1, 2, 3, 4)"))
# turnovers = int(input("13 - Total de perda de bolas ? (1, 2, 3, 4)"))
# foulsPersonal = int(input("14 - Quantidade de faltas ? (1, 2, 3, 4)"))
# points = int(input("15 - Quantidade de pontos ? (1, 2, 3, 4)"))

In [47]:
# dict_entrada = {'Q' : "1 - Está em qual quarto do Jogo ? (1, 2, 3, 4)",
# 'fieldGoalsMade' : "2 - Quantos pontos seu time fez ?",
# 'fieldGoalsAttempted' : "3 - Quantas tentativas de arremesso ?",
# 'threePointersMade' : "4 - Quantos pontos de 3 ?",
# 'threePointersAttempted' : "5 - Quantas tentativas de 3 ?",
# 'freeThrowsMade' : "6 - Quantos arremessos livres ?",
# 'freeThrowsAttempted' : "7 - Quantos tentativas de arremessos livre ?",
# 'reboundsOffensive' : "8 - Quantos rebotes ofensivos ?",
# 'reboundsDefensive' : "9 - Quantos rebotes defencivos ?",
# 'assists' : "10 - Total de assistencias  ?",
# 'steals' : "11 - Total de roubos de bola ?",
# 'blocks' : "12 - Total de bloqueios ?",
# 'turnovers' : "13 - Total de perda de bolas ?",
# 'foulsPersonal' : "14 - Quantidade de faltas ?",
# 'points' : "15 - Quantidade de pontos ?"}
# dict_dados = {}

In [48]:
# for chave, valor in dict_entrada.items():
#     dict_dados[chave] = [int(input(valor))]
#     print(valor)
#     print(dict_dados[chave])
# df_entrada = pd.DataFrame(data=dict_dados)

# if rfc.predict(df_entrada)[0] == 'L':
#     print("Setu time vai perder")
# else:
#     print("Seu time vai vencer")