## **Análisis estadístico**

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [2]:
pd.options.display.float_format = '{:,.2f}'.format

----

**Cargar conjunto de datos**

In [3]:
df_2012_2023 = pd.read_csv('../data/processed/mex_trade_2012_2023_clean.csv')
df_2012_2023.head()

Unnamed: 0,prod_est,coverage,type,year,month,concept,value_usd,status
0,Trade Balance of Goods of Mexico,National,Exports,2012,1,Total Exports,27281.58,Final Figures
1,Trade Balance of Goods of Mexico,National,Exports,2012,1,Petroleum,4628.3,Final Figures
2,Trade Balance of Goods of Mexico,National,Exports,2012,1,Crude Oil,4008.11,Final Figures
3,Trade Balance of Goods of Mexico,National,Exports,2012,1,Other Oil,620.18,Final Figures
4,Trade Balance of Goods of Mexico,National,Exports,2012,1,Non Petroleum,22653.28,Final Figures


---

**Calcular *media*, *mediana* y *desviación estándar* de *value_usd* por *año*, *tipo de operación*, *conceptos***

In [None]:
estadisticas_agrupadas = df_2012_2023.groupby(['year', 'type', 'concept'])['value_usd'].agg(
    mean_value=('mean'),
    median_value=('median'),
    std_value=('std')
).reset_index() 

print("Media, Mediana y Desviación Estándar de value_usd por Año, Tipo y Concepto:")
estadisticas_agrupadas

Media, Mediana y Desviación Estándar de value_usd por Año, Tipo y Concepto:


Unnamed: 0,year,type,concept,mean_value,median_value,std_value
0,2012,Exports,Agrarian,909.52,926.98,214.57
1,2012,Exports,Crude Oil,3904.36,3970.42,367.57
2,2012,Exports,Extractive,408.87,417.26,75.10
3,2012,Exports,Manufacturing,25166.14,25374.98,1744.81
4,2012,Exports,Non Petroleum,26484.53,26548.44,1739.51
...,...,...,...,...,...,...
211,2023,Imports,Petroleum,4557.91,4572.92,565.72
212,2023,Imports,Total Imports,50384.67,51450.77,2685.91
213,2023,Imports,Total Imports CIF (Total Imports + Freight and...,52323.05,53417.50,2767.90
214,2023,Not applicable,Total Trade Balance Exports Total - Imports Total,-1033.60,-1129.14,1429.27


---

**Intervalos de confianza para la media anual**

In [None]:
# 1. Agrupar y obtener media, std y count
estadisticas = df_2012_2023.groupby(['year', 'type', 'concept'])['value_usd'].agg(
    mean_value=('mean'),
    std_value=('std'),
    count_value=('count')
).reset_index()

estadisticas['sem'] = estadisticas.apply(
    lambda row: row['std_value'] / np.sqrt(row['count_value']) if row['count_value'] > 1 else np.nan,
    axis=1
)

confidence_level = 0.95
alpha = 1 - confidence_level

estadisticas['t_score'] = estadisticas.apply(
    lambda row: stats.t.ppf(1 - alpha/2, row['count_value'] - 1) if row['count_value'] > 1 else np.nan,
    axis=1
)

estadisticas['margin_of_error'] = estadisticas['t_score'] * estadisticas['sem']

estadisticas['lower_bound_95_ci'] = estadisticas['mean_value'] - estadisticas['margin_of_error']
estadisticas['upper_bound_95_ci'] = estadisticas['mean_value'] + estadisticas['margin_of_error']

resultados_ci = estadisticas[['year', 'type', 'concept', 'mean_value', 'lower_bound_95_ci', 'upper_bound_95_ci', 'count_value']].round(2)

print("Intervalos de Confianza (95%) para la Media Anual de value_usd:")
resultados_ci

Intervalos de Confianza (95%) para la Media Anual de value_usd:


Unnamed: 0,year,type,concept,mean_value,lower_bound_95_ci,upper_bound_95_ci,count_value
0,2012,Exports,Agrarian,909.52,773.19,1045.85,12
1,2012,Exports,Crude Oil,3904.36,3670.82,4137.91,12
2,2012,Exports,Extractive,408.87,361.15,456.59,12
3,2012,Exports,Manufacturing,25166.14,24057.54,26274.74,12
4,2012,Exports,Non Petroleum,26484.53,25379.30,27589.76,12
...,...,...,...,...,...,...,...
211,2023,Imports,Petroleum,4557.91,4153.22,4962.60,10
212,2023,Imports,Total Imports,50384.67,48463.28,52306.05,10
213,2023,Imports,Total Imports CIF (Total Imports + Freight and...,52323.05,50343.01,54303.08,10
214,2023,Not applicable,Total Trade Balance Exports Total - Imports Total,-1033.60,-2056.04,-11.16,10
