In [2]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [3]:
# Specify the path to your .xlsx file
file_path = "C://Users//LENOVO//Downloads//STATIN_DATA//sample_wildfire_weather_data.csv"

# Read the CSV file
df = pd.read_csv(file_path)

# Display the DataFrame
df.head()

Unnamed: 0,Region,Estimated_fire_area,Temperature_Mean,WindSpeed_Mean
0,WA,639.982,23.390953,3.551176
1,WA,593.569167,23.55262,4.0917
2,WA,37.345455,14.617797,4.637551
3,WA,544.986486,25.227217,4.334514
4,WA,35.8236,19.427573,4.840414


# Sample Temperature

In [12]:
temp_df = df[['Region','Temperature_Mean']]
print(temp_df)

    Region  Temperature_Mean
0       WA         23.390953
1       WA         23.552620
2       WA         14.617797
3       WA         25.227217
4       WA         19.427573
..     ...               ...
387     TA          8.267706
388     TA         19.759433
389     TA         10.675490
390     TA          9.040038
391     TA         17.762332

[392 rows x 2 columns]


In [14]:
temp_pivot = temp_df.pivot(index=None, columns='Region', values='Temperature_Mean')
temp_pivot_reset  = temp_pivot.reset_index(drop=True)
temp_pivot_reset

Region,NSW,NT,QL,SA,TA,VI,WA
0,,,,,,,23.390953
1,,,,,,,23.552620
2,,,,,,,14.617797
3,,,,,,,25.227217
4,,,,,,,19.427573
...,...,...,...,...,...,...,...
387,,,,,8.267706,,
388,,,,,19.759433,,
389,,,,,10.675490,,
390,,,,,9.040038,,


In [17]:
model = ols('Temperature_Mean ~ (Region)', data=temp_df).fit()
temp_anova_table = sm.stats.anova_lm(model, typ=1)
temp_anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Region,6.0,4921.443313,820.240552,28.580728,2.908602e-28
Residual,385.0,11049.145337,28.699079,,


In [38]:
from scipy.stats import f
alfa = 0.05
k = len(temp_df.Region.unique())
N = len(temp_df)
v1 = k-1
v2 = N-k
v_tot = v1+v2
SSK = temp_anova_table['sum_sq']['Region']
SSE = temp_anova_table['sum_sq']['Residual']
SST = SSK+SSE
MSK = SSK/v1
MSE = SSE/v2
F_hitung = MSK/MSE
F_table = f.ppf(1 - alpha, v1, v2)

temp_data = {
    'SUMBER VARIANSI': ['Perlakuan', 'Error', 'TOTAL'],
    'SS': [SSK, SSE, SST],
    'v': [v1, v2, v_tot],
    'MS': [MSK, MSE, None],
    'Fhitung': [F_hitung, None, None]
}

# Create a DataFrame
temp_anova_final = pd.DataFrame(temp_data )

# Display the DataFrame
print(temp_anova_final)
print(f"Diketahui: ")
print(f"alfa: {alfa}")
print(f"F hitung = {MSK}/{MSE} = {F_hitung}")
print(f"F tabel = F({alfa};{v1};{v2}) = {F_table}")
if F_hitung > F_table:
    print("Maka h0 ditolak, artinya ada minimal 1 region yang berbeda nilainya")
elif F_hitung < F_table:
    print("Maka h0 diterima, artinya semua region sama nilainya")

  SUMBER VARIANSI            SS    v          MS    Fhitung
0       Perlakuan   4921.443313    6  820.240552  28.580728
1           Error  11049.145337  385   28.699079        NaN
2           TOTAL  15970.588650  391         NaN        NaN
Diketahui: 
alfa: 0.05
F hitung = 820.2405521582151/28.69907879658492 = 28.580727554775052
F tabel = F(0.05;6;385) = 2.1221371769660387
Maka h0 ditolak, artinya ada minimal 1 region yang berbeda nilainya


# Sample WindSpeed


In [39]:
windspd_df = df[['Region','WindSpeed_Mean']]
print(windspd_df)

    Region  WindSpeed_Mean
0       WA        3.551176
1       WA        4.091700
2       WA        4.637551
3       WA        4.334514
4       WA        4.840414
..     ...             ...
387     TA        1.748738
388     TA        3.482381
389     TA        2.851847
390     TA        3.716907
391     TA        2.908519

[392 rows x 2 columns]


In [41]:
windspd_pivot = windspd_df.pivot(index=None, columns='Region', values='WindSpeed_Mean')
windspd_pivot_reset  = windspd_pivot.reset_index(drop=True)
windspd_pivot_reset

Region,NSW,NT,QL,SA,TA,VI,WA
0,,,,,,,3.551176
1,,,,,,,4.091700
2,,,,,,,4.637551
3,,,,,,,4.334514
4,,,,,,,4.840414
...,...,...,...,...,...,...,...
387,,,,,1.748738,,
388,,,,,3.482381,,
389,,,,,2.851847,,
390,,,,,3.716907,,


In [43]:
model = ols('WindSpeed_Mean ~ (Region)', data=windspd_df).fit()
windspd_anova_table = sm.stats.anova_lm(model, typ=1)
windspd_anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
Region,6.0,45.362306,7.560384,9.941582,3.284427e-10
Residual,385.0,292.785184,0.760481,,


In [44]:
from scipy.stats import f
alfa = 0.05
k = len(temp_df.Region.unique())
N = len(temp_df)
v1 = k-1
v2 = N-k
v_tot = v1+v2
SSK = windspd_anova_table['sum_sq']['Region']
SSE = windspd_anova_table['sum_sq']['Residual']
SST = SSK+SSE
MSK = SSK/v1
MSE = SSE/v2
F_hitung = MSK/MSE
F_table = f.ppf(1 - alpha, v1, v2)

temp_data = {
    'SUMBER VARIANSI': ['Perlakuan', 'Error', 'TOTAL'],
    'SS': [SSK, SSE, SST],
    'v': [v1, v2, v_tot],
    'MS': [MSK, MSE, None],
    'Fhitung': [F_hitung, None, None]
}

# Create a DataFrame
temp_anova_final = pd.DataFrame(temp_data )

# Display the DataFrame
print(temp_anova_final)
print(f"Diketahui: ")
print(f"alfa: {alfa}")
print(f"F hitung = {MSK}/{MSE} = {F_hitung}")
print(f"F tabel = F({alfa};{v1};{v2}) = {F_table}")
if F_hitung > F_table:
    print("Maka h0 ditolak, artinya ada minimal 1 region yang berbeda nilainya")
elif F_hitung < F_table:
    print("Maka h0 diterima, artinya semua region sama nilainya")

  SUMBER VARIANSI          SS    v        MS   Fhitung
0       Perlakuan   45.362306    6  7.560384  9.941582
1           Error  292.785184  385  0.760481       NaN
2           TOTAL  338.147490  391       NaN       NaN
Diketahui: 
alfa: 0.05
F hitung = 7.560384306608366/0.7604809986298416 = 9.941582130559354
F tabel = F(0.05;6;385) = 2.1221371769660387
Maka h0 ditolak, artinya ada minimal 1 region yang berbeda nilainya
