In [None]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="tornados")
import pandas as pd

In [None]:
tornados_df = pd.read_csv('./tornados.csv')

In [None]:
# rename the colimns to propper names
tornados_df.rename(columns={'yr': 'Year', 'mo':'Month', 'dy':'Day of Month', 'date':'Date', 'st':'State', 'mag':'Magnitude on F/EF scale', 'inj':'Number of injuries', 'fat':'Number of fatalities', 'slat':'Starting latitude', 'slon':'Starting longitude', 'elat':'Ending latitude', 'elon':'Ending longitude', 'len':'Track length in miles', 'wid':'Tornado width in yards' }, inplace=True)
tornados_df.head()

Unnamed: 0,Year,Month,Day of Month,Date,State,Magnitude on F/EF scale,Number of injuries,Number of fatalities,Starting latitude,Starting longitude,Ending latitude,Ending longitude,Track length in miles,Tornado width in yards
0,1950,1,3,03/01/1950,IL,3,3,0,39.1,-89.3,39.12,-89.23,3.6,130
1,1950,1,3,03/01/1950,MO,3,3,0,38.77,-90.22,38.83,-90.03,9.5,150
2,1950,1,3,03/01/1950,OH,1,1,0,40.88,-84.58,0.0,0.0,0.1,10
3,1950,1,13,13/01/1950,AR,3,1,1,34.4,-94.37,0.0,0.0,0.6,17
4,1950,1,25,25/01/1950,IL,2,0,0,41.17,-87.33,0.0,0.0,0.1,100


In [None]:
# convert Track length in miles to Track length in KM
tornados_df['Track length in miles'] = (tornados_df['Track length in miles'] * 1.60934).round(2)
tornados_df.rename(columns={'Track length in miles': 'Track length in KM'}, inplace=True)

# convert Tornado width in yards to Tornado width in meters
tornados_df.rename(columns={'Tornado width in yards': 'Tornado width in meters'}, inplace=True)
tornados_df['Tornado width in meters'] = (tornados_df['Tornado width in meters'] * 0.9144).round(2)

tornados_df.head()


Unnamed: 0,Year,Month,Day of Month,Date,State,Magnitude on F/EF scale,Number of injuries,Number of fatalities,Starting latitude,Starting longitude,Ending latitude,Ending longitude,Track length in KM,Tornado width in meters
0,1950,1,3,03/01/1950,IL,3,3,0,39.1,-89.3,39.12,-89.23,5.79,118.87
1,1950,1,3,03/01/1950,MO,3,3,0,38.77,-90.22,38.83,-90.03,15.29,137.16
2,1950,1,3,03/01/1950,OH,1,1,0,40.88,-84.58,0.0,0.0,0.16,9.14
3,1950,1,13,13/01/1950,AR,3,1,1,34.4,-94.37,0.0,0.0,0.97,15.54
4,1950,1,25,25/01/1950,IL,2,0,0,41.17,-87.33,0.0,0.0,0.16,91.44


In [None]:
# Only keep tornados that tracked 5KM or more
tornados_df = tornados_df.loc[tornados_df['Track length in KM'] >= 5]

In [None]:
from IPython.display import clear_output

rows, columns = tornados_df.shape
print(rows)

16913


In [None]:
# Generateing starting location information
# Note !!! This block is estimated to run up to 2 hours
i = 0

def progress():
  global rows
  global i
  progress = str((i/rows)*100)
  clear_output(wait=True)
  print("Progress: " + progress[:4] + "%")
  i = i + 1

def get_location(row):
    lat, lon = row["Starting latitude"], row["Starting longitude"]
    location = geolocator.reverse(f"{lat}, {lon}")
    progress()
    if location and "address" in location.raw:
        address = location.raw["address"]
        location_info = [
            address.get("road", " "),
            address.get("hamlet", " "),
            address.get("county", " "),
            address.get("state", " "),
            address.get("postcode", " ")
        ]
        return ",".join(filter(None, location_info))
    else:
        return ""

tornados_df["Starting location"] = tornados_df.apply(get_location, axis=1)

Progress: 99.9%


In [None]:
# Generateing ending location information
# Note !!! This block is estimated to run up to 2 hours
i = 0

def progress():
  global rows
  global i
  progress = str((i/rows)*100)
  clear_output(wait=True)
  print("Progress: " + progress[:4] + "%")
  i = i + 1

def get_location(row):
    lat, lon = row["Ending latitude"], row["Ending longitude"]
    location = geolocator.reverse(f"{lat}, {lon}")
    progress()
    if location and "address" in location.raw:
        address = location.raw["address"]
        location_info = [
            address.get("road", " "),
            address.get("hamlet", " "),
            address.get("county", " "),
            address.get("state", " "),
            address.get("postcode", " ")
        ]
        return ",".join(filter(None, location_info))
    else:
        return ""

tornados_df["Ending location"] = tornados_df.apply(get_location, axis=1)

Progress: 5.92%


In [None]:
tornados_df.to_excel('tornados.xlsx')