In [11]:
import pandas as pd
import os

# Load datasets from GitHub URLs
btp_url = "https://raw.githubusercontent.com/GitAIwithMike/ATC-Models/refs/heads/main/notebooks/BTP.csv"
agc_url = "https://raw.githubusercontent.com/GitAIwithMike/ATC-Models/refs/heads/main/notebooks/AGC.csv"
weather_stats_url = "https://raw.githubusercontent.com/GitAIwithMike/ATC-Models/refs/heads/main/notebooks/weather_stats.csv"

btp_data = pd.read_csv(btp_url, low_memory=False)
agc_data = pd.read_csv(agc_url, low_memory=False)
weather_stats_data = pd.read_csv(weather_stats_url, low_memory=False)

# Clean data: Convert 'valid' columns in BTP and AGC datasets to datetime
btp_data['valid'] = pd.to_datetime(btp_data['valid'], errors='coerce')
agc_data['valid'] = pd.to_datetime(agc_data['valid'], errors='coerce')

# Extract and convert datetime from 'video_name' in weather_stats_data
weather_stats_data['video_name_datetime'] = weather_stats_data['video_name'].str.split('_').str[1]
weather_stats_data['video_name_datetime'] = pd.to_datetime(
    weather_stats_data['video_name_datetime'], 
    format="%Y-%m-%d-%H-%M-%S", 
    errors='coerce', 
    utc=True
)

# Create the 'notebooks' directory if it doesn't exist
os.makedirs("notebooks", exist_ok=True)

# Save cleaned datasets
btp_data.to_csv("notebooks/cleaned_BTP.csv", index=False)
agc_data.to_csv("notebooks/cleaned_AGC.csv", index=False)
weather_stats_data.to_csv("notebooks/cleaned_weather_stats.csv", index=False)

print("Data cleaned and saved!")

Data cleaned and saved!
