## **Append das estatísticas gerais dos times ao longo de 2016 até 2019 com as estatísticas das partidas**

### **Agregação dos datasets dos times nos 4 anos em um único**

In [8]:
import os
import pandas as pd

# Caminho para as pastas existentes
base_path = r'C:/Users/gabri/OneDrive/Documents/Faculdade/2024.2/Projeto de Ciência de Dados/volleyball-case-study/data/fivb-ncaa-2019/ncaa/team_stats'
output_path = os.path.join(base_path, 'aggregated_stats')
os.makedirs(output_path, exist_ok=True)  # Cria a pasta de saída se não existir

# Anos a serem agregados
years = ['team_stats_2017', 'team_stats_2018', 'team_stats_2019','team_stats_2016']

# Dicionário para armazenar DataFrames para cada time
team_data = {}

# Processa cada ano
for year in years:
    year_path = os.path.join(base_path, year)
    
    for file_name in os.listdir(year_path):
        if file_name.endswith('.csv'):
            team_name = file_name.split('.')[0]  # Pega o nome do time a partir do nome do arquivo
            
            # Caminho completo do arquivo CSV
            file_path = os.path.join(year_path, file_name)
            

            df = pd.read_csv(file_path)
            
            # Agrega os dados ao DataFrame do time
            if team_name in team_data:
                team_data[team_name] = pd.concat([team_data[team_name], df])
            else:
                team_data[team_name] = df

# Salva cada DataFrame agregado em um novo CSV
for team_name, team_df in team_data.items():
    output_file = os.path.join(output_path, f'{team_name}_aggregated.csv')
    team_df.to_csv(output_file, index=False)

print(f"Aggregation complete. CSVs saved in {output_path}")

Aggregation complete. CSVs saved in C:/Users/gabri/OneDrive/Documents/Faculdade/2024.2/Projeto de Ciência de Dados/volleyball-case-study/data/fivb-ncaa-2019/ncaa/team_stats\aggregated_stats


### **Criação de um csv com a média dos somatórios das estatísticas dos times ao longo dos 4 anos**

In [2]:
import os
import pandas as pd

# Caminho para os arquivos agregados
aggregated_path = r'C:/Users/gabri/OneDrive/Documents/Faculdade/2024.2/Projeto de Ciência de Dados/volleyball-case-study/data/fivb-ncaa-2019/ncaa/team_stats/aggregated_stats'

# Colunas que serão removidas
columns_to_drop = ['Jersey', 'Player', 'Yr', 'Pos', 'Ht', 'GP', 'GS']

# Lista para armazenar os dados das médias dos times
team_stats_summary = []

# Processa cada arquivo CSV agregado
for file_name in os.listdir(aggregated_path):
    if file_name.endswith('.csv'):
        team_name = file_name.split('_')[0]  # Pega o nome do time a partir do nome do arquivo
        
        # Carrega o CSV
        file_path = os.path.join(aggregated_path, file_name)
        df = pd.read_csv(file_path)
        
        # Filtra a linha onde o jogador é "Totals"
        totals_row = df[df['Player'] == 'Totals']
        
        if not totals_row.empty:
            # Agora removemos as colunas indesejadas após filtrar 'Totals'
            totals_row = totals_row.drop(columns=columns_to_drop)
            
            # Seleciona apenas as colunas numéricas
            totals_row_numeric = totals_row.select_dtypes(include=[int, float])
            
            # Calcula a média para cada coluna individualmente
            totals_mean = totals_row_numeric.mean(axis=0)
            
            # Adiciona o nome do time e as médias ao resumo
            team_stats_summary.append({
                'Team': team_name,
                **totals_mean.to_dict()  # Converte a série em dicionário para incluir todas as médias
            })

# Cria um DataFrame com o resumo das médias dos times
team_summary_df = pd.DataFrame(team_stats_summary)


### **Retirando colunas Nulas**

In [3]:
columns_to_drop = ['MP.1','Attend']

team_summary_df.drop(columns=columns_to_drop)

Unnamed: 0,Team,S,MP,MS,Kills,Errors,Total Attacks,Hit Pct,Assists,Aces,SErr,Digs,RErr,Block Solos,Block Assists,BErr,PTS,BHE,Trpl Dbl
0,A&M-Corpus Christi,113.250000,27.0,,1481.000000,575.25,4078.250000,0.221500,1383.750000,143.000000,220.750000,1792.750000,152.75,40.250000,266.25,33.50,1783.875000,35.000000,27.0
1,Abilene Christian,109.333333,23.0,,1290.333333,579.00,3911.333333,0.182667,1195.666667,122.666667,176.333333,1627.333333,152.00,31.333333,438.00,38.00,1614.166667,20.666667,23.0
2,Air Force,120.750000,26.0,,1476.250000,754.00,4098.500000,0.175250,1394.500000,166.250000,285.250000,1608.750000,149.25,47.000000,372.25,58.25,1840.375000,48.000000,26.0
3,Akron,103.500000,22.0,,1291.500000,577.00,3677.500000,0.194000,1192.000000,147.500000,221.750000,1493.500000,141.00,41.500000,267.00,32.25,1575.750000,26.000000,22.0
4,Alabama A&M,120.500000,28.0,,1332.750000,608.75,3737.000000,0.192750,1219.750000,160.500000,153.500000,1599.000000,216.75,82.500000,293.50,63.00,1689.750000,42.000000,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,Wright St,109.250000,8.0,,1371.500000,614.25,4168.500000,0.181000,1282.000000,136.000000,193.000000,1919.000000,127.75,38.750000,414.50,53.00,1699.250000,24.000000,8.0
332,Wyoming,115.750000,12.0,,1455.000000,555.75,3896.000000,0.229750,1337.250000,121.500000,206.250000,1589.500000,134.75,55.250000,497.75,49.50,1853.500000,27.500000,12.0
333,Xavier,113.500000,7.0,,1446.000000,608.25,4070.250000,0.204500,1353.000000,113.250000,176.000000,1711.000000,148.50,57.750000,397.25,57.00,1769.375000,28.750000,7.0
334,Yale,84.250000,9.0,,1116.500000,431.00,3026.000000,0.226750,1035.250000,119.750000,155.750000,1314.250000,77.50,47.500000,261.25,37.50,1414.375000,16.750000,9.0


In [4]:
team_summary_df.isna().sum()

Team               0
S                  0
MP                 8
MS               336
Kills              0
Errors             0
Total Attacks      0
Hit Pct            0
Assists            0
Aces               0
SErr               0
Digs               0
RErr               0
Block Solos        0
Block Assists      0
BErr               0
PTS                0
BHE                0
Trpl Dbl           8
Attend           336
MP.1             336
dtype: int64

In [5]:
columns_to_drop = ['MS','Attend','MP.1']
team_summary_df.drop(columns = columns_to_drop)

Unnamed: 0,Team,S,MP,Kills,Errors,Total Attacks,Hit Pct,Assists,Aces,SErr,Digs,RErr,Block Solos,Block Assists,BErr,PTS,BHE,Trpl Dbl
0,A&M-Corpus Christi,113.250000,27.0,1481.000000,575.25,4078.250000,0.221500,1383.750000,143.000000,220.750000,1792.750000,152.75,40.250000,266.25,33.50,1783.875000,35.000000,27.0
1,Abilene Christian,109.333333,23.0,1290.333333,579.00,3911.333333,0.182667,1195.666667,122.666667,176.333333,1627.333333,152.00,31.333333,438.00,38.00,1614.166667,20.666667,23.0
2,Air Force,120.750000,26.0,1476.250000,754.00,4098.500000,0.175250,1394.500000,166.250000,285.250000,1608.750000,149.25,47.000000,372.25,58.25,1840.375000,48.000000,26.0
3,Akron,103.500000,22.0,1291.500000,577.00,3677.500000,0.194000,1192.000000,147.500000,221.750000,1493.500000,141.00,41.500000,267.00,32.25,1575.750000,26.000000,22.0
4,Alabama A&M,120.500000,28.0,1332.750000,608.75,3737.000000,0.192750,1219.750000,160.500000,153.500000,1599.000000,216.75,82.500000,293.50,63.00,1689.750000,42.000000,28.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,Wright St,109.250000,8.0,1371.500000,614.25,4168.500000,0.181000,1282.000000,136.000000,193.000000,1919.000000,127.75,38.750000,414.50,53.00,1699.250000,24.000000,8.0
332,Wyoming,115.750000,12.0,1455.000000,555.75,3896.000000,0.229750,1337.250000,121.500000,206.250000,1589.500000,134.75,55.250000,497.75,49.50,1853.500000,27.500000,12.0
333,Xavier,113.500000,7.0,1446.000000,608.25,4070.250000,0.204500,1353.000000,113.250000,176.000000,1711.000000,148.50,57.750000,397.25,57.00,1769.375000,28.750000,7.0
334,Yale,84.250000,9.0,1116.500000,431.00,3026.000000,0.226750,1035.250000,119.750000,155.750000,1314.250000,77.50,47.500000,261.25,37.50,1414.375000,16.750000,9.0


### **Salvando o CSV**

In [6]:
# Define o caminho onde o CSV será salvo
output_csv_path = r'C:/Users/gabri/OneDrive/Documents/Faculdade/2024.2/Projeto de Ciência de Dados/volleyball-case-study/data/team_statistics_summary.csv'

# Salva o DataFrame em um arquivo CSV
team_summary_df.to_csv(output_csv_path, index=False)

## **Append das Estatísticas gerais dos times com as partidas**

In [11]:
df = pd.read_csv("C:/Users/gabri/OneDrive/Documents/Faculdade/2024.2/Projeto de Ciência de Dados/novo/volleyball-case-study/data/fivb-ncaa-2019/ncaa/combined/accumulated/team_v_team.csv")
df_stats = pd.read_csv("C:/Users/gabri/OneDrive/Documents/Faculdade/2024.2/Projeto de Ciência de Dados/novo/volleyball-case-study/data/fivb-ncaa-2019/ncaa/team_statistics_summary.csv")

df_stats.rename(columns={"Team":"TeamA"}, inplace=True)

df = pd.merge(df, df_stats, on='TeamA', how='left')
df.head()

# Salvando o CSV
output_csv_path = 'C:/Users/gabri/OneDrive/Documents/Faculdade/2024.2/Projeto de Ciência de Dados/novo/volleyball-case-study/data/fivb-ncaa-2019/ncaa/matches_teams_stats.csv'
df.to_csv(output_csv_path, index=False)