In [1]:
# Import dependencies
import pandas as pd
from pathlib import Path

In [2]:
# File to Load
atmos_path = Path("../Datasets/ATMOSPHERIC_COND.csv")

In [3]:
# Read the CSV file
atmos_data = pd.read_csv(atmos_path, low_memory=False)

In [4]:
# Store it in a Dataframe
atmos_data_df = pd.DataFrame(atmos_data)
atmos_data_df.head()

Unnamed: 0,ACCIDENT_NO,ATMOSPH_COND,ATMOSPH_COND_SEQ,Atmosph Cond Desc
0,T20060000010,1,1,Clear
1,T20060000018,1,1,Clear
2,T20060000022,1,1,Clear
3,T20060000023,1,1,Clear
4,T20060000026,1,1,Clear


In [5]:
# Get the total of unique accident data from vehicle table
unique_atmos_data = len(atmos_data_df["ACCIDENT_NO"].unique())
unique_atmos_data

203708

In [6]:
# Checking the data types
column_data_types = atmos_data_df.dtypes
column_data_types

ACCIDENT_NO          object
ATMOSPH_COND          int64
ATMOSPH_COND_SEQ      int64
Atmosph Cond Desc    object
dtype: object

In [7]:
# Find columns with missing values
columns_with_missing_values = atmos_data_df.isnull().any()

# Display the columns with missing values
columns_with_missing_values

ACCIDENT_NO          False
ATMOSPH_COND         False
ATMOSPH_COND_SEQ     False
Atmosph Cond Desc    False
dtype: bool

In [8]:
# Group by ACCIDENT_NO and aggregate Atmosph Cond Desc values
grouped_atmos_data = atmos_data_df.groupby('ACCIDENT_NO')['Atmosph Cond Desc'].apply(', '.join).reset_index()

# Rename the column to indicate concatenated atmos conditions
grouped_atmos_data.rename(columns={'Atmosph Cond Desc': 'Atmosph_Cond_Desc'}, inplace=True)

# Display the resulting DataFrame
grouped_atmos_data

Unnamed: 0,ACCIDENT_NO,Atmosph_Cond_Desc
0,T20060000010,Clear
1,T20060000018,Clear
2,T20060000022,Clear
3,T20060000023,Clear
4,T20060000026,Clear
...,...,...
203703,T20200019239,Clear
203704,T20200019247,Clear
203705,T20200019250,Clear
203706,T20200019253,Clear


In [9]:
# Export file in Data folder
cleaned_data_output_path = Path("../Cleaned_Datasets/atmospheric_cond_cleaned.csv")
grouped_atmos_data.to_csv(cleaned_data_output_path, index=None, header=True)