# PreProcessioning

In [2]:
import pandas as pd

def add_date_columns(df):
    df['Date/Time'] = pd.to_datetime(df['Date/Time'])
    
   
    df['Date'] = df['Date/Time'].dt.date
    df['Time'] = df['Date/Time'].dt.time
    
    
    df['Day of Week'] = df['Date/Time'].dt.strftime('%A')
    
    
    print(df)

In [3]:
import os
def group_csv_files_by_folder(root_folder):
    folder_dataframes = {}

    
    for folder_name in os.listdir(root_folder):
        folder_path = os.path.join(root_folder, folder_name)

        if os.path.isdir(folder_path):
            csv_list = []

            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)

                if file_name.endswith('.csv'):
                    df = pd.read_csv(file_path)
                    csv_list.append(df)

            combined_df = pd.concat(csv_list, ignore_index=True)

            folder_dataframes[folder_name] = combined_df

    return folder_dataframes


In [4]:
root = 'data/DisneyLand'
folder_dataframes = group_csv_files_by_folder(root)
print(folder_dataframes)

{'AliceInWon':                        Ride            Date/Time  Wait Time
0       Alice in Wonderland  2023-04-01 07:35:05          5
1       Alice in Wonderland  2023-04-01 07:40:06          5
2       Alice in Wonderland  2023-04-01 07:45:10          5
3       Alice in Wonderland  2023-04-01 07:50:11          5
4       Alice in Wonderland  2023-04-01 07:55:05          5
...                     ...                  ...        ...
189425  Alice in Wonderland  2024-09-08 23:56:02         10
189426  Alice in Wonderland  2024-09-08 23:57:02         10
189427  Alice in Wonderland  2024-09-08 23:58:02         10
189428  Alice in Wonderland  2024-09-08 23:59:01         10
189429  Alice in Wonderland  2024-09-09 00:00:02         10

[189430 rows x 3 columns], 'AstroOrbitor':                  Ride            Date/Time  Wait Time
0       Astro Orbitor  2023-04-01 07:35:05          5
1       Astro Orbitor  2023-04-01 07:40:06          5
2       Astro Orbitor  2023-04-01 07:45:10          5
3    

In [5]:
import os

def process_and_save_dataframes(folder_dataframes, save_path):
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    for folder_name, df in folder_dataframes.items():
        if 'Ride' in df.columns:
            df = df.drop(columns=['Ride'])

        if 'Wait Time' in df.columns:
            df = df.rename(columns={'Wait Time': folder_name})

        csv_save_path = os.path.join(save_path, f"{folder_name}.csv")

        df.to_csv(csv_save_path, index=False)

        print(f"DataFrame for '{folder_name}' saved to '{csv_save_path}'")


# Example usage
save_path = 'data/Rides'
process_and_save_dataframes(folder_dataframes, save_path)

DataFrame for 'AliceInWon' saved to 'data/Rides\AliceInWon.csv'
DataFrame for 'AstroOrbitor' saved to 'data/Rides\AstroOrbitor.csv'
DataFrame for 'Autopia' saved to 'data/Rides\Autopia.csv'
DataFrame for 'BuzzLightyear' saved to 'data/Rides\BuzzLightyear.csv'
DataFrame for 'CanalBoats' saved to 'data/Rides\CanalBoats.csv'
DataFrame for 'Carrousel' saved to 'data/Rides\Carrousel.csv'
DataFrame for 'CircusTrain' saved to 'data/Rides\CircusTrain.csv'
DataFrame for 'Dumbo' saved to 'data/Rides\Dumbo.csv'
DataFrame for 'Gadgetcoaster' saved to 'data/Rides\Gadgetcoaster.csv'
DataFrame for 'HauntedMansion' saved to 'data/Rides\HauntedMansion.csv'
DataFrame for 'IndianaJones' saved to 'data/Rides\IndianaJones.csv'
DataFrame for 'JungleCruise' saved to 'data/Rides\JungleCruise.csv'
DataFrame for 'Matterhorn' saved to 'data/Rides\Matterhorn.csv'
DataFrame for 'Mr.Toad' saved to 'data/Rides\Mr.Toad.csv'
DataFrame for 'Nemo' saved to 'data/Rides\Nemo.csv'
DataFrame for 'ParkAvg' saved to 'data/Rid

In [6]:
def process_datetime_columns(folder_dataframes, date_path):
    os.makedirs(os.path.dirname(date_path), exist_ok=True)

    combined_datetime = pd.Series(dtype='object')
    for folder_name, df in folder_dataframes.items():
        if 'Date/Time' in df.columns:
            combined_datetime = pd.concat([combined_datetime, df['Date/Time']])

    combined_datetime = combined_datetime.drop_duplicates().reset_index(drop=True)
    combined_datetime = pd.to_datetime(combined_datetime)

    df_combined = pd.DataFrame()
    df_combined['Date/Time'] = combined_datetime
    df_combined['Date'] = combined_datetime.dt.date
    df_combined['Time'] = combined_datetime.dt.time
    df_combined['Day of Week'] = combined_datetime.dt.day_name()
    df_combined['Weekend'] = df_combined['Day of Week'].isin(['Saturday', 'Sunday'])

    df_combined.to_csv(date_path, index=False)

    absolute_path = os.path.abspath(date_path)
    print(f"DataFrame successfully saved to '{absolute_path}'")

    return df_combined

df_combined = process_datetime_columns(folder_dataframes, date_path='data/Rides/combined_datetime.csv')

print(df_combined.head())


DataFrame successfully saved to 'C:\Users\ethan\PycharmProjects\DisneyBot\data\Rides\combined_datetime.csv'
            Date/Time        Date      Time Day of Week  Weekend
0 2023-04-01 07:35:05  2023-04-01  07:35:05    Saturday     True
1 2023-04-01 07:40:06  2023-04-01  07:40:06    Saturday     True
2 2023-04-01 07:45:10  2023-04-01  07:45:10    Saturday     True
3 2023-04-01 07:50:11  2023-04-01  07:50:11    Saturday     True
4 2023-04-01 07:55:05  2023-04-01  07:55:05    Saturday     True


In [7]:
unique_dates = df_combined['Date'].drop_duplicates()

unique_dates_sorted = unique_dates.sort_values()

print(unique_dates_sorted)

65093     2023-01-01
65259     2023-01-02
65373     2023-01-03
65386     2023-01-04
65424     2023-01-05
             ...    
186305    2024-09-05
187193    2024-09-06
187935    2024-09-07
188729    2024-09-08
189426    2024-09-09
Name: Date, Length: 618, dtype: object
