In [2]:
import pandas as pd
import glob
import os


path = '/Users/moneysniper/Documents/NBA_analysis_project/gamelogs'
files = glob.glob(os.path.join(path, 'regular_season_*.csv'))

df_list = []
columns_to_load = [
    'diff_orb', 'diff_tov', 'diff_3p%', 'diff_2p%', 'diff_ft%', 'diff_fta', 'diff_3pa'
    ]
for file in files:
    year = os.path.basename(file).split('_')[-1].split('.')[0] 
    df = pd.read_csv(file)
    df = df[df['Unnamed: 0'] % 2 != 0]
    df = df[columns_to_load]
    df['year'] = int(year)
    df_list.append(df)

df = pd.concat(df_list, ignore_index=True)

overall_df = pd.read_csv('/Users/moneysniper/Documents/NBA_analysis_project/gamelogs/regular_season.csv')
overall_df = overall_df[overall_df['Unnamed: 0'] % 2 != 0]
overall_df = overall_df[columns_to_load]
overall_df['year'] = 'all_year'

df = pd.concat([overall_df, df], ignore_index=True)

grouped_summary_stats = df.groupby('year').agg(['mean', 'std']).round(5)
grouped_summary_stats = grouped_summary_stats.transpose()
grouped_summary_stats.to_csv('description_table.csv')

In [11]:
content_mapping = {
    'diff_orb': 'difference of offensive rebounds',
    'diff_tov': 'difference of turnovers',
    'diff_3p%': 'difference of 3 point shooting percentage',
    'diff_2p%': 'difference of 2 point shooting percentage',
    'diff_ft%': 'difference of free throw percentage',
    'diff_fta': 'difference of free throw attempts',
    'diff_3pa': 'difference of 3 point attempts'
}

# Replace the content in the DataFrame
grouped_summary_stats_replaced = grouped_summary_stats.replace(content_mapping, regex=True)

grouped_summary_stats_replaced = grouped_summary_stats_replaced.replace('_', ' ', regex=True)
grouped_summary_stats_replaced.columns = grouped_summary_stats_replaced.columns.str.replace('_', ' ')

latex_output = grouped_summary_stats.to_latex(index=True, float_format="{:.1f}".format)

# Print the LaTeX output
print(latex_output)

\begin{tabular}{llrrrrrrrrrr}
\toprule
 & year & 2016 & 2017 & 2018 & 2019 & 2020 & 2021 & 2022 & 2023 & 2024 & all_year \\
\midrule
\multirow[t]{2}{*}{diff_orb} & mean & 0.2 & 0.1 & 0.2 & 0.3 & 0.4 & 0.1 & 0.1 & 0.0 & 0.2 & 0.2 \\
 & std & 5.6 & 5.4 & 5.2 & 5.4 & 5.2 & 5.1 & 5.5 & 5.6 & 5.5 & 5.4 \\
\cline{1-12}
\multirow[t]{2}{*}{diff_tov} & mean & -0.4 & -0.2 & -0.1 & -0.1 & 0.1 & 0.3 & -0.3 & -0.2 & -0.0 & -0.1 \\
 & std & 5.0 & 4.9 & 4.9 & 5.1 & 5.1 & 5.1 & 4.8 & 5.2 & 5.0 & 5.0 \\
\cline{1-12}
\multirow[t]{2}{*}{diff_3p%} & mean & 1.0 & 2.0 & -0.1 & 0.8 & 0.7 & 0.6 & 0.4 & 1.3 & 0.7 & 0.8 \\
 & std & 14.3 & 13.6 & 12.9 & 12.3 & 12.0 & 12.5 & 12.0 & 11.9 & 11.7 & 12.6 \\
\cline{1-12}
\multirow[t]{2}{*}{diff_2p%} & mean & 1.0 & 0.9 & 1.6 & 1.3 & 0.9 & 0.3 & 1.1 & 0.8 & 0.8 & 1.0 \\
 & std & 9.3 & 9.5 & 10.0 & 10.0 & 10.3 & 10.0 & 10.5 & 10.2 & 10.8 & 10.1 \\
\cline{1-12}
\multirow[t]{2}{*}{diff_ft%} & mean & 0.4 & 0.1 & 0.0 & -0.1 & -0.2 & 0.2 & -0.1 & 0.2 & 1.8 & 0.3 \\
 & std & 1