In [1]:
# assignment: combining two datasets given the "time" column and then, converting the time values in the earthquakes.csv from its numerical representation of time (timestamp) 
import pandas as pd

# first step to load both the earthquake csv datasets through pandas
earthquakes = pd.read_csv('earthquakes.csv') # fully numerical time (UNIX timestamps)
earthquakes_2023_global = pd.read_csv('earthquakes_2023_global.csv') # standard date and time
    
# get an idea of each dataset's structure (mainly for time columns)
earthquakes.head(), earthquakes_2023_global.head()

(  alert  cdi      code                                             detail  \
 0   NaN  NaN  37389218  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 1   NaN  NaN  37389202  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 2   NaN  4.4  37389194  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 3   NaN  NaN  37389186  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 4   NaN  NaN  73096941  https://earthquake.usgs.gov/fdsnws/event/1/que...   
 
        dmin  felt    gap           ids   mag magType  ...  sources     status  \
 0  0.008693   NaN   85.0  ,ci37389218,  1.35      ml  ...     ,ci,  automatic   
 1  0.020030   NaN   79.0  ,ci37389202,  1.29      ml  ...     ,ci,  automatic   
 2  0.021370  28.0   21.0  ,ci37389194,  3.42      ml  ...     ,ci,  automatic   
 3  0.026180   NaN   39.0  ,ci37389186,  0.44      ml  ...     ,ci,  automatic   
 4  0.077990   NaN  192.0  ,nc73096941,  2.16      md  ...     ,nc,  automatic   
 
             time                   

In [2]:
# modifying the time format in earthquake.csv to be more readable
earthquakes['time'] = pd.to_datetime(earthquakes['time'], unit = 'ms')

# doing the same for earthquakes_2023_global.csv to ensure both have the same time format
earthquakes_2023_global['time'] = pd.to_datetime(earthquakes_2023_global['time'])

# next merging the two time columns
merged_earthquakes = pd.concat([earthquakes, earthquakes_2023_global], ignore_index = True)

# displaying the merged columns to confirm
merged_earthquakes['time'].head()

0    2018-10-13 23:59:28.010000
1    2018-10-13 23:58:49.610000
2    2018-10-13 23:57:42.610000
3    2018-10-13 23:56:18.070000
4    2018-10-13 23:51:56.050000
Name: time, dtype: object

In [3]:
# importing calendar to create a full calendar with earthquake markers
import calendar

# using the given code while changing bits of it in a lot of ways
def earthquake_calendar(year, month):
    earthquakes_in_month = merged_earthquakes[
        (merged_earthquakes['time'].apply(lambda x: x.year) == year) &
        (merged_earthquakes['time'].apply(lambda x: x.month) == month)
    ]
    
    # fetching the data on days when earthquakes occurred
    earthquake_days = earthquakes_in_month['time'].apply(lambda x: x.day).unique() 
    
    first_weekday, days_in_month = calendar.monthrange(year, month)
    
    day_counter = 1
    output_str = f"{calendar.month_name[month]} {year}\n"
    output_str += "Mo Tu We Th Fr Sa Su\n"
    
    # adding spacing for the first week
    output_str += "   " * first_weekday
    
    # loop over each day of the month
    while day_counter <= days_in_month:
        for weekday in range(first_weekday, 7): 
            if day_counter > days_in_month:
                break  # to break the loop once the last day is reached
            
            # 'X' gets printed for earthquake days, otherwise the day number
            if day_counter in earthquake_days:
                output_str += " X "
            else:
                output_str += f"{day_counter:2d} "
            
            # move to the next line after Saturday
            if (weekday + 1) % 7 == 0:
                output_str += "\n"
            
            day_counter += 1
        first_weekday = 0

    return output_str

# testing with the given month and year!
print(earthquake_calendar(2024, 2))

February 2024
Mo Tu We Th Fr Sa Su
          1  2  3  4 
 5  6  7  8  9 10 11 
12 13 14 15 16 17 18 
19 20 21 22 23 24 25 
26 27 28 29 


Output shows that there were no earthquakes that occurred in February 2024 because no "X" appeared on the calendar. However, when running it again on December 2023 this time, there were multiple occurrences of earthquakes throughout that month as the "X" appeared everywhere except on a Saturday and a Sunday, 30th and 31st, respectively. Filtering data through the apply(lambda x:) for both year and month culminated such code to extract the year and month from the time column of merged_earthquakes dataset. To be honest, this was very intriguing for me.