In [173]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [174]:
mainDF = pd.read_csv(r'C:\Users\raadr\OneDrive\Desktop\AflAnalysis-\data\raw\rawData12_24Complete.csv')

# Data Cleaning

In [175]:
mainDF['Attendance'] = mainDF['Attendance'].str.replace(',', '', regex=True)  # Remove commas
mainDF['Attendance'] = pd.to_numeric(mainDF['Attendance'], errors='coerce') 

## Data Encoding
Label Encoding is used for Venues and Teams since one-hot encoding will introduce a lot of empty tables and space. However, drawback is that ML model might misinterpret the venues and teams as having an order. Label encoding could assign large numbers to certain categories, making it harder for some algorithms to interpret the values

In [176]:
for column in mainDF.columns:
    print(column)
    print(type(mainDF[column][0]))

GameId
<class 'str'>
Year
<class 'numpy.int64'>
Round
<class 'numpy.int64'>
Date
<class 'str'>
MaxTemp
<class 'numpy.float64'>
MinTemp
<class 'numpy.float64'>
Rainfall
<class 'numpy.float64'>
Venue
<class 'str'>
StartTime
<class 'str'>
Attendance
<class 'numpy.int64'>
HomeTeam
<class 'str'>
HomeTeamScoreQT
<class 'numpy.float64'>
HomeTeamScoreHT
<class 'numpy.float64'>
HomeTeamScore3QT
<class 'numpy.float64'>
HomeTeamScoreFT
<class 'numpy.float64'>
HomeTeamScore
<class 'numpy.int64'>
AwayTeam
<class 'str'>
AwayTeamScoreQT
<class 'numpy.float64'>
AwayTeamScoreHT
<class 'numpy.float64'>
AwayTeamScore3QT
<class 'numpy.float64'>
AwayTeamScoreFT
<class 'numpy.float64'>
AwayTeamScore
<class 'numpy.int64'>
homePosition
<class 'numpy.float64'>
homePoints
<class 'numpy.float64'>
homePercentage
<class 'numpy.float64'>
awayPosition
<class 'numpy.float64'>
awayPoints
<class 'numpy.float64'>
awayPercentage
<class 'numpy.float64'>
Home Odds
<class 'numpy.float64'>
Away Odds
<class 'numpy.float64'>


In [177]:
print(mainDF['HomeTeam'].unique())
print(mainDF['Venue'].unique())

['Greater Western Sydney' 'Richmond' 'Hawthorn' 'Melbourne' 'Gold Coast'
 'Fremantle' 'North Melbourne' 'Western Bulldogs' 'Port Adelaide'
 'Brisbane Lions' 'Essendon' 'Sydney' 'West Coast' 'Adelaide'
 'Collingwood' 'St Kilda' 'Geelong' 'Carlton']
['Stadium Australia' 'M.C.G.' 'Carrara' 'Subiaco' 'Docklands'
 'Football Park' 'Gabba' 'S.C.G.' 'Bellerive Oval' 'Blacktown'
 'Kardinia Park' 'Manuka Oval' 'York Park' 'Marrara Oval'
 'Sydney Showground' "Cazaly's Stadium" 'Wellington' 'Adelaide Oval'
 'Traeger Park' 'Jiangwan Stadium' 'Eureka Stadium' 'Perth Stadium'
 'Riverway Stadium' 'Norwood Oval' 'Summit Sports Park']


In [178]:
team_name_mapping = {
    'Adelaide': 1,
    'Brisbane Lions': 2,
    'Carlton': 3,
    'Collingwood': 4,
    'Essendon': 5,
    'Fremantle': 6,
    'Geelong': 7,
    'Gold Coast': 8,
    'Greater Western Sydney': 9,
    'Hawthorn': 10,
    'Melbourne': 11,
    'North Melbourne': 12,
    'Port Adelaide': 13,
    'Richmond': 14,
    'St Kilda': 15,
    'Sydney': 16,
    'West Coast': 17,
    'Western Bulldogs': 18
}

mainDF['HomeTeam'] = mainDF['HomeTeam'].map(team_name_mapping)
mainDF['AwayTeam'] = mainDF['AwayTeam'].map(team_name_mapping)

In [179]:
# Ensure Date is in datetime format if not already.
mainDF['Date'] = pd.to_datetime(mainDF['Date'])

# Step 1: Sort the DataFrame by Date to ensure chronological order.
mainDF = mainDF.sort_values(by='Date', ascending=True)

# Step 2: Build a unified team-games DataFrame from the home and away appearances.
# Extract home team appearances:
df_home = mainDF[['Date', 'HomeTeam', 'homePosition', 'homePoints', 'homePercentage', 'HomeTeamScore', 'AwayTeamScore', 'Year', 'Round']].copy()
df_home = df_home.rename(columns={
    'HomeTeam': 'Team',
    'homePosition': 'position',
    'homePoints': 'points',
    'homePercentage': 'percentage',
    'HomeTeamScore': 'teamScore',
    'AwayTeamScore': 'opponentScore',
    'Year': 'year',
    'Round': 'round'
})

# Extract away team appearances:
df_away = mainDF[['Date', 'AwayTeam', 'awayPosition', 'awayPoints', 'awayPercentage', 'HomeTeamScore', 'AwayTeamScore', 'Year', 'Round']].copy()
df_away = df_away.rename(columns={
    'AwayTeam': 'Team',
    'awayPosition': 'position',
    'awayPoints': 'points',
    'awayPercentage': 'percentage',
    'AwayTeamScore': 'teamScore',
    'HomeTeamScore': 'opponentScore',
    'Year': 'year',
    'Round': 'round'
})

# Concatenate both so that every game for every team is in one DataFrame.
df_team = pd.concat([df_home, df_away], ignore_index=True)

# Sort by Team and Date so that each team’s games are in chronological order.
df_team = df_team.sort_values(by=['Team', 'Date'], ascending=True)

# Step 3: Create win_or_loss column based on the team score and opponent score.
# If the team score is higher than the opponent score, it's a win (1), else it's a loss (0).
df_team['win_or_loss'] = np.where(df_team['teamScore'] > df_team['opponentScore'], 1, 0)

# Step 4: Shift the win_or_loss to get the previous game status.
df_team['prev_win_or_loss'] = df_team.groupby('Team')['win_or_loss'].shift(1)

# Step 5: Identify the first game of each year and set 'prev_win_or_loss' to NaN for those.
# Reset win_or_loss for Round 1 of each year.
mask_new_year_or_round1 = (df_team['round'] == 1)
df_team.loc[mask_new_year_or_round1, 'prev_win_or_loss'] = np.nan

# Step 6: Merge the win/loss information back into the original DataFrame for both Home and Away teams.
# For home team win/loss info:
home_prev = df_team[['Date', 'Team', 'prev_win_or_loss']].copy()
home_prev = home_prev.rename(columns={
    'Team': 'HomeTeam',
    'prev_win_or_loss': 'previous_game_home_win_loss'
})

# Merge on Date and HomeTeam
mainDF = pd.merge(mainDF, home_prev, on=['Date', 'HomeTeam'], how='left')

# For away team win/loss info:
away_prev = df_team[['Date', 'Team', 'prev_win_or_loss']].copy()
away_prev = away_prev.rename(columns={
    'Team': 'AwayTeam',
    'prev_win_or_loss': 'previous_game_away_win_loss'
})

# Merge on Date and AwayTeam
mainDF = pd.merge(mainDF, away_prev, on=['Date', 'AwayTeam'], how='left')

# Display the final DataFrame
mainDF.iloc[15:25]


Unnamed: 0,GameId,Year,Round,Date,MaxTemp,MinTemp,Rainfall,Venue,StartTime,Attendance,...,homePosition,homePoints,homePercentage,awayPosition,awayPoints,awayPercentage,Home Odds,Away Odds,previous_game_home_win_loss,previous_game_away_win_loss
15,2012R0208,2012,2,2012-04-08,20.0,6.9,0.0,Docklands,4:40 PM,21078,...,7.0,4.0,164.7,17.0,0.0,41.7,1.05,9.56,0.0,0.0
16,2012R0207,2012,2,2012-04-08,18.4,8.9,0.4,Bellerive Oval,1:10 PM,11127,...,6.0,4.0,180.4,18.0,0.0,32.2,1.01,15.94,0.0,0.0
17,2012R0209,2012,2,2012-04-09,15.2,10.1,1.4,M.C.G.,3:10 PM,69231,...,10.0,4.0,99.0,8.0,4.0,109.7,2.48,1.53,0.0,1.0
18,2012R0301,2012,3,2012-04-13,24.8,8.0,0.4,M.C.G.,7:50 PM,84259,...,2.0,12.0,194.7,13.0,4.0,81.1,1.83,1.93,1.0,1.0
19,2012R0302,2012,3,2012-04-14,24.8,13.1,0.0,M.C.G.,1:45 PM,49826,...,10.0,4.0,97.9,17.0,0.0,50.2,1.21,4.39,0.0,0.0
20,2012R0303,2012,3,2012-04-14,27.9,10.8,0.0,Football Park,1:40 PM,19032,...,12.0,4.0,85.7,3.0,12.0,148.8,2.58,1.48,0.0,1.0
21,2012R0304,2012,3,2012-04-14,26.0,11.8,0.0,Subiaco,2:40 PM,33473,...,9.0,8.0,108.7,14.0,4.0,73.3,1.09,7.32,0.0,0.0
22,2012R0305,2012,3,2012-04-14,25.2,18.3,9.2,Carrara,7:40 PM,17069,...,16.0,0.0,53.3,4.0,12.0,115.9,9.66,1.05,0.0,1.0
23,2012R0306,2012,3,2012-04-14,24.8,13.1,0.0,Docklands,7:40 PM,28971,...,15.0,0.0,59.5,5.0,8.0,185.8,2.49,1.52,0.0,1.0
24,2012R0307,2012,3,2012-04-15,26.9,16.0,0.0,Blacktown,1:10 PM,6875,...,18.0,0.0,37.0,1.0,12.0,211.2,23.86,1.01,0.0,1.0


In [180]:
import numpy as np
import pandas as pd

# Ensure Date is in datetime format if not already.
mainDF['Date'] = pd.to_datetime(mainDF['Date'])

# Step 1: Sort the DataFrame by Date to ensure chronological order.
mainDF = mainDF.sort_values(by='Date', ascending=True)

# Step 2: Build a unified team-games DataFrame from the home and away appearances.
# Extract home team appearances:
df_home = mainDF[['Date', 'HomeTeam', 'homePosition', 'homePoints', 'homePercentage']].copy()
df_home = df_home.rename(columns={
    'HomeTeam': 'Team',
    'homePosition': 'position',
    'homePoints': 'points',
    'homePercentage': 'percentage'
})

# Extract away team appearances:
df_away = mainDF[['Date', 'AwayTeam', 'awayPosition', 'awayPoints', 'awayPercentage']].copy()
df_away = df_away.rename(columns={
    'AwayTeam': 'Team',
    'awayPosition': 'position',
    'awayPoints': 'points',
    'awayPercentage': 'percentage'
})

# Concatenate both so that every game for every team is in one DataFrame.
df_team = pd.concat([df_home, df_away], ignore_index=True)

# Sort by Team and Date so that each team’s games are in chronological order.
df_team = df_team.sort_values(by=['Team', 'Date'], ascending=True)

# Create a 'year' column for each game.
df_team['year'] = df_team['Date'].dt.year

# Step 3: For each team, shift the stats to create previous game columns.
df_team['prev_position'] = df_team.groupby('Team')['position'].shift(1)
df_team['prev_points'] = df_team.groupby('Team')['points'].shift(1)
df_team['prev_percentage'] = df_team.groupby('Team')['percentage'].shift(1)

# Also shift the year column to compare years.
df_team['prev_year'] = df_team.groupby('Team')['year'].shift(1)

# For games where the previous game is in a different year, set the previous game stats to NaN.
mask_new_year = df_team['year'] != df_team['prev_year']
df_team.loc[mask_new_year, ['prev_position', 'prev_points', 'prev_percentage']] = np.nan

# (Optional) Drop the helper columns if you don't need them.
df_team.drop(columns=['year', 'prev_year'], inplace=True)

# Step 4: Merge the previous game info back into the original DataFrame.
# For the home team info:
home_prev = df_team[['Date', 'Team', 'prev_position', 'prev_points', 'prev_percentage']].copy()
home_prev = home_prev.rename(columns={
    'Team': 'HomeTeam',
    'prev_position': 'previous_game_home_position',
    'prev_points': 'previous_game_home_points',
    'prev_percentage': 'previous_game_home_percentage'
})

# Merge on Date and HomeTeam
mainDF = pd.merge(mainDF, home_prev, on=['Date', 'HomeTeam'], how='left')

# For the away team info:
away_prev = df_team[['Date', 'Team', 'prev_position', 'prev_points', 'prev_percentage']].copy()
away_prev = away_prev.rename(columns={
    'Team': 'AwayTeam',
    'prev_position': 'previous_game_away_position',
    'prev_points': 'previous_game_away_points',
    'prev_percentage': 'previous_game_away_percentage'
})

# Merge on Date and AwayTeam
mainDF = pd.merge(mainDF, away_prev, on=['Date', 'AwayTeam'], how='left')
mainDF

Unnamed: 0,GameId,Year,Round,Date,MaxTemp,MinTemp,Rainfall,Venue,StartTime,Attendance,...,Home Odds,Away Odds,previous_game_home_win_loss,previous_game_away_win_loss,previous_game_home_position,previous_game_home_points,previous_game_home_percentage,previous_game_away_position,previous_game_away_points,previous_game_away_percentage
0,2012R0101,2012,1,2012-03-24,24.0,12.2,0.0,Stadium Australia,7:20 PM,38203,...,13.78,1.02,,,,,,,,
1,2012R0102,2012,1,2012-03-29,25.7,9.7,0.0,M.C.G.,7:45 PM,78285,...,2.48,1.52,,,,,,,,
2,2012R0103,2012,1,2012-03-30,27.4,9.7,0.0,M.C.G.,7:50 PM,78466,...,1.65,2.19,,,,,,,,
3,2012R0104,2012,1,2012-03-31,29.1,15.1,0.6,M.C.G.,1:45 PM,33473,...,1.42,2.81,,,,,,,,
4,2012R0105,2012,1,2012-03-31,28.2,19.7,0.0,Carrara,3:45 PM,12790,...,4.39,1.22,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2953,2023SF01,2023,-3,2023-09-15,24.5,14.4,0.0,M.C.G.,7:50 PM,96412,...,1.61,2.21,0.0,1.0,,,,,,
2954,2023SF02,2023,-3,2023-09-16,24.0,7.6,0.0,Adelaide Oval,7:10 PM,45520,...,1.62,2.18,0.0,1.0,,,,,,
2955,2023PF01,2023,-2,2023-09-22,15.3,5.7,0.0,M.C.G.,7:50 PM,97665,...,1.55,2.34,1.0,1.0,,,,,,
2956,2023PF02,2023,-2,2023-09-23,24.1,14.2,0.0,Gabba,5:15 PM,36012,...,1.38,3.05,1.0,1.0,,,,,,


In [181]:
# Removing stadiums which have low capacity or low usage
stadiums_to_remove = [
    'Bellerive Oval', 'Manuka Oval', 'Stadium Australia', 'Marrara Oval', 
    "Cazaly's Stadium", 'Eureka Stadium', 'Traeger Park', 'Wellington', 
    'Jiangwan Stadium', 'Norwood Oval', 'Blacktown', 'Riverway Stadium', 
    'Summit Sports Park'
]


mainDF = mainDF[~mainDF['Venue'].isin(stadiums_to_remove)]

venue_name_mapping = {
    'M.C.G.': 1, 'Carrara': 2, 'Subiaco': 3, 'Docklands': 4, 
    'Football Park': 5, 'Gabba': 6, 'S.C.G.': 7, 'Kardinia Park': 8, 
    'York Park': 9, 'Sydney Showground': 10, 'Adelaide Oval': 11, 
    'Perth Stadium': 12
}

mainDF['Venue'] = mainDF['Venue'].map(venue_name_mapping)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['Venue'] = mainDF['Venue'].map(venue_name_mapping)


In [182]:
# Dealing with Datetime Features
mainDF['Date'] = pd.to_datetime(mainDF['Date'], errors='coerce')

#Create Day Variable
mainDF['Day'] = mainDF['Date'].dt.dayofweek  # Extract the day of the week (Monday=0, Sunday=6)

#Create Month Variable
mainDF['Month'] = mainDF['Date'].dt.month

# Assuming StartTime is in 12-hour clock format (e.g., '7:20 PM')
mainDF['StartTime'] = pd.to_datetime(mainDF['StartTime'], format='%I:%M %p', errors='coerce')

# If StartTime is in 24-hour clock format (e.g., '19:20')
# mainDF['StartTime'] = pd.to_datetime(mainDF['StartTime'], format='%H:%M', errors='coerce')

# Extract minutes since midnight
mainDF['MinutesSinceMidnight'] = mainDF['StartTime'].dt.hour * 60 + mainDF['StartTime'].dt.minute

#Holiday/Seasonal Indicators


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['Date'] = pd.to_datetime(mainDF['Date'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['Day'] = mainDF['Date'].dt.dayofweek  # Extract the day of the week (Monday=0, Sunday=6)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['Month'] = mainDF['Date'].dt.month
A 

In [183]:
mainDF.iloc[380:415]

Unnamed: 0,GameId,Year,Round,Date,MaxTemp,MinTemp,Rainfall,Venue,StartTime,Attendance,...,previous_game_away_win_loss,previous_game_home_position,previous_game_home_points,previous_game_home_percentage,previous_game_away_position,previous_game_away_points,previous_game_away_percentage,Day,Month,MinutesSinceMidnight
405,2013QF01,2013,-4,2013-09-06,14.4,9.7,3.0,1,1900-01-01 19:50:00,59615,...,0.0,1.0,76.0,135.7,4.0,62.0,132.5,4,9,1190
406,2013EF01,2013,-5,2013-09-07,18.9,7.1,0.0,1,1900-01-01 19:45:00,51722,...,0.0,6.0,56.0,115.0,7.0,48.0,102.4,5,9,1185
407,2013QF02,2013,-4,2013-09-07,17.9,6.3,0.0,8,1900-01-01 14:20:00,32458,...,0.0,2.0,72.0,135.6,3.0,66.0,134.1,5,9,860
408,2013EF02,2013,-5,2013-09-08,17.3,6.8,0.4,1,1900-01-01 15:20:00,94690,...,1.0,5.0,60.0,122.8,8.0,44.0,106.7,6,9,920
409,2013SF01,2013,-3,2013-09-13,15.6,0.1,1.4,1,1900-01-01 19:50:00,52744,...,1.0,,,,,,,4,9,1190
411,2013PF01,2013,-2,2013-09-20,18.1,9.4,2.2,1,1900-01-01 19:50:00,85569,...,1.0,,,,,,,4,9,1190
412,2013PF02,2013,-2,2013-09-21,21.6,9.5,2.0,3,1900-01-01 17:45:00,43249,...,1.0,,,,,,,5,9,1065
413,2013GF01,2013,-1,2013-09-28,15.7,6.3,2.7,1,1900-01-01 14:30:00,100007,...,1.0,,,,,,,5,9,870
414,2014R0101,2014,1,2014-03-14,27.6,13.5,0.0,4,1900-01-01 19:50:00,37571,...,,,,,,,,4,3,1190
415,2014R0102,2014,1,2014-03-15,29.5,17.4,0.0,10,1900-01-01 16:40:00,17102,...,,,,,,,,5,3,1000


In [184]:
# Forward-fill NaN values within each HomeTeam group
mainDF['homePosition'] = mainDF.groupby('HomeTeam')['homePosition'].ffill()
mainDF['homePercentage'] = mainDF.groupby('HomeTeam')['homePercentage'].ffill()
mainDF['homePoints'] = mainDF.groupby('HomeTeam')['homePoints'].ffill()

mainDF['awayPosition'] = mainDF.groupby('AwayTeam')['awayPosition'].ffill()
mainDF['awayPercentage'] = mainDF.groupby('AwayTeam')['awayPercentage'].ffill()
mainDF['awayPoints'] = mainDF.groupby('AwayTeam')['awayPoints'].ffill()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['homePosition'] = mainDF.groupby('HomeTeam')['homePosition'].ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['homePercentage'] = mainDF.groupby('HomeTeam')['homePercentage'].ffill()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['homePoints'] = mainDF.groupby('HomeTeam')

In [185]:
#Transform Betting Odds to Decimal Odds
mainDF['HomeProbability'] = 1 / mainDF['Home Odds']
mainDF['AwayProbability'] = 1 / mainDF['Away Odds']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['HomeProbability'] = 1 / mainDF['Home Odds']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mainDF['AwayProbability'] = 1 / mainDF['Away Odds']


# Removing Outliers

In [186]:
#Removing Years affected by COVID-19
mainDF = mainDF[(mainDF.Year != 2020)& (mainDF.Year != 2021)]

In [187]:
mainDF.to_csv(r'C:\Users\raadr\OneDrive\Desktop\AflAnalysis-\data\interim\fullSesData.csv', index=False)

In [188]:
# Create a new DataFrame for Home and Away Season
HADF = mainDF[mainDF.Round > 0]
HADF

Unnamed: 0,GameId,Year,Round,Date,MaxTemp,MinTemp,Rainfall,Venue,StartTime,Attendance,...,previous_game_home_points,previous_game_home_percentage,previous_game_away_position,previous_game_away_points,previous_game_away_percentage,Day,Month,MinutesSinceMidnight,HomeProbability,AwayProbability
1,2012R0102,2012,1,2012-03-29,25.7,9.7,0.0,1,1900-01-01 19:45:00,78285,...,,,,,,3,3,1185,0.403226,0.657895
2,2012R0103,2012,1,2012-03-30,27.4,9.7,0.0,1,1900-01-01 19:50:00,78466,...,,,,,,4,3,1190,0.606061,0.456621
3,2012R0104,2012,1,2012-03-31,29.1,15.1,0.6,1,1900-01-01 13:45:00,33473,...,,,,,,5,3,825,0.704225,0.355872
4,2012R0105,2012,1,2012-03-31,28.2,19.7,0.0,2,1900-01-01 15:45:00,12790,...,,,,,,5,3,945,0.227790,0.819672
5,2012R0106,2012,1,2012-03-31,24.0,9.0,0.0,3,1900-01-01 16:45:00,34601,...,,,,,,5,3,1005,0.510204,0.558659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2944,2023R2405,2023,24,2023-08-26,13.9,9.4,0.0,8,1900-01-01 19:25:00,20533,...,42.0,114.7,9.0,44.0,107.6,5,8,1165,0.411523,0.666667
2945,2023R2406,2023,24,2023-08-26,24.3,9.5,0.0,12,1900-01-01 18:10:00,47027,...,12.0,52.5,13.0,40.0,115.1,5,8,1090,0.230415,0.847458
2946,2023R2407,2023,24,2023-08-27,16.0,9.1,0.0,11,1900-01-01 12:00:00,39860,...,64.0,111.5,12.0,42.0,94.9,6,8,720,0.900901,0.171233
2947,2023R2408,2023,24,2023-08-27,20.8,9.8,0.0,7,1900-01-01 15:20:00,41753,...,50.0,111.6,4.0,60.0,124.8,6,8,920,0.574713,0.500000


In [189]:
HADF.to_csv(r'C:\Users\raadr\OneDrive\Desktop\AflAnalysis-\data\interim\H&AsesData.csv', index=False)