In [1]:
import pandas as pd
amtrackData = pd.read_csv("amtrackData.csv")
amtrackData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4800241 entries, 0 to 4800240
Data columns (total 25 columns):
 #   Column                   Dtype  
---  ------                   -----  
 0   Unnamed: 0.1             int64  
 1   Unnamed: 0               int64  
 2   Train ID                 int64  
 3   Year                     int64  
 4   Month                    int64  
 5   Day                      int64  
 6   Station Code             object 
 7   Schedule Arrival Day     object 
 8   Schedule Arrival Time    object 
 9   Schedule Departure Day   object 
 10  Schedule Departure Time  object 
 11  Actual Arrival Time      object 
 12  Actual Departure Time    object 
 13  X                        float64
 14  Y                        float64
 15  OBJECTID                 float64
 16  STNCODE                  object 
 17  STNNAME                  object 
 18  ADDRESS1                 object 
 19  ADDRESS2                 object 
 20  CITY                     object 
 21  STATE   

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import imageio
import os
import gc

route_color = '#1f77b4'  # Blue 
stop_color = '#ff7f0e'  # Orange
background_color = '#f7f7f7'  # Cute

def save_gif_for_month(images, month_num, year):
    gif_name = f'time_lapse_{year}_{month_num}.gif'
    with imageio.get_writer(gif_name, mode='I', duration=0.5) as writer:
        for filename in images:
            image = imageio.imread(filename)
            writer.append_data(image)
   
    for filename in images:
        os.remove(filename)
    print(f'Saved GIF for {year} month {month_num} as {gif_name}')

def process_month(month, year):
    
    # Only get necessary data for the current month and year
    filtered_data = amtrackData[(amtrackData['Year'] == year) & (amtrackData['Month'] == month)]
    
    if filtered_data.empty:
        print(f"No data for {year} month {month}")
        return
    
    filtered_data['Actual Arrival Time'] = pd.to_datetime(f'{year}-{month:02}-' + filtered_data['Day'].astype(str) + ' ' + filtered_data['Actual Arrival Time'])
    filtered_data['Actual Departure Time'] = pd.to_datetime(f'{year}-{month:02}-' + filtered_data['Day'].astype(str) + ' ' + filtered_data['Actual Departure Time'])
    
    next_day_departure = filtered_data['Actual Departure Time'] < filtered_data['Actual Arrival Time']
    filtered_data.loc[next_day_departure, 'Actual Departure Time'] = filtered_data['Actual Departure Time'] + pd.Timedelta(days=1)

    images = []
    for day in range(1, 32):
        if day > pd.Timestamp(year, month, 1).days_in_month:
            break
        for hour in range(24):
            start_time = pd.Timestamp(year, month, day, hour, 0)
            end_time = start_time + pd.Timedelta(hours=1)

            subset = filtered_data[
                ((filtered_data['Actual Arrival Time'] >= start_time) & (filtered_data['Actual Arrival Time'] < end_time)) | 
                ((filtered_data['Actual Departure Time'] > start_time) & (filtered_data['Actual Departure Time'] <= end_time)) |
                ((filtered_data['Actual Arrival Time'] <= start_time) & (filtered_data['Actual Departure Time'] >= end_time))
            ]

            fig, ax = plt.subplots(figsize=(12, 12))
            ax.set_facecolor(background_color)

            m = Basemap(projection='merc', llcrnrlat=24, urcrnrlat=49, llcrnrlon=-125, urcrnrlon=-66, resolution='i', ax=ax)
            m.shadedrelief(scale=1.5)  # Increased scale to reduce resolution
            m.drawcoastlines(color='#bdc3c7')
            m.drawcountries(linewidth=0.5, color='#2c3e50')
            m.drawstates(linewidth=0.3, color='#34495e')

            unique_trains = subset['Train ID'].unique()
            for train_id in unique_trains:
                train_data = subset[subset['Train ID'] == train_id].sort_values('Actual Arrival Time')
                for i in range(len(train_data)-1):
                    departure_coords = (train_data.iloc[i]['X'], train_data.iloc[i]['Y'])
                    arrival_coords = (train_data.iloc[i+1]['X'], train_data.iloc[i+1]['Y'])

                    x, y = m([departure_coords[0], arrival_coords[0]], [departure_coords[1], arrival_coords[1]])
                    ax.plot(x, y, '-o', markersize=7, color=route_color, alpha=0.8, linewidth=2, zorder=3)

            x, y = m(subset['X'].values, subset['Y'].values)
            m.scatter(x, y, s=40, color=stop_color, marker='H', edgecolor='white', linewidth=0.5, zorder=5)

            font_title = {'family': 'sans-serif', 'color':  '#2c3e50', 'weight': 'bold', 'size': 17}
            ax.set_title(f"Trains Active from {hour:02}:00 to {(hour+1)%24:02}:00 on {day:02} {month:02} {year}", fontdict=font_title, pad=20)

            plt.tight_layout()
            filename = f"slice_{year}_{month:02}_{day:02}_{hour:02}.png"
            plt.savefig(filename)
            images.append(filename)
            plt.close()

            # Manual garbage collection
            del m
            gc.collect()

    save_gif_for_month(images, month, year)


years_to_process = [2019, 2020 2021, 2022]
months_to_process = [1,2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

for year in years_to_process:
    for month in months_to_process:
            process_month(month, year)
