In [1]:
import pandas as pd
import datetime as dt
from sqlalchemy import create_engine

ufo_file = "scrubbed.csv"
eclipse_file = "lunar.csv"

ufo_df = pd.read_csv(ufo_file)
ufo_df.rename(columns={'longitude ': 'longitude'}, inplace=True)
ufo_df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611


In [2]:
# convert latitude object to float
ufo_df['latitude'] = pd.to_numeric(ufo_df['latitude'], errors='coerce')

In [3]:
# round latitude and longitude
ufo_df['longitude'] = ufo_df['longitude'].round()
ufo_df['latitude'] = ufo_df['latitude'].round()

In [4]:
# write function to add directional indicators to lng
def longitude_number_to_longitude_string(longitude):

   longitude_letter = ''
   if (longitude>0):
       longitude_letter = 'E'
   else:
       longitude_letter = 'W'
   return f'{abs(longitude):.0f}{longitude_letter}'

In [5]:
# Add longitude directionals column
ufo_df['longitude_string'] = ufo_df.longitude.map(longitude_number_to_longitude_string)

In [6]:
# drop the original longitude column
ufo_df.drop('longitude', axis=1, inplace=True)

In [7]:
# write function to add directional indicators to lat
def latitude_number_to_latitude_string(latitude):

   latitude_letter = ''
   if (latitude>0):
       latitude_letter = 'N'
   else:
       latitude_letter = 'S'
   return f'{abs(latitude):.0f}{latitude_letter}'

In [8]:
# Add latitude directionals column
ufo_df['latitude_string'] = ufo_df.latitude.map(latitude_number_to_latitude_string)

In [9]:
# drop the original latitude column
ufo_df.drop('latitude', axis=1, inplace=True)

In [10]:
eclipse_df = pd.read_csv(eclipse_file)
eclipse_df.head()

Unnamed: 0,Catalog Number,Calendar Date,Eclipse Time,Delta T (s),Lunation Number,Saros Number,Eclipse Type,Quincena Solar Eclipse,Gamma,Penumbral Magnitude,Umbral Magnitude,Latitude,Longitude,Penumbral Eclipse Duration (m),Partial Eclipse Duration (m),Total Eclipse Duration (m)
0,1,-1999 June 26,14:13:28,46437,-49456,17,N,t-,-1.0981,0.8791,-0.1922,24S,22W,268.8,-,-
1,2,-1999 November 21,20:23:49,46427,-49451,-16,N,-a,-1.1155,0.8143,-0.1921,15N,98W,233.4,-,-
2,3,-1998 May 17,05:47:36,46416,-49445,-11,P,-t,0.8988,1.2105,0.2069,13S,89E,281.7,102.7,-
3,4,-1998 November 11,05:15:58,46404,-49439,-6,P,-a,-0.4644,2.0382,0.974,12N,113E,343.4,200.8,-
4,5,-1997 May 6,18:57:01,46392,-49433,-1,T+,pp,0.1003,2.6513,1.6963,11S,92W,322.8,213.5,98.2


In [11]:
date_format = '%m/%d/%Y %H:%M'
final_format = '%Y/%m/%d'
df = ufo_df.head()
times = []
for index, row in ufo_df.iterrows():
    if '24:00' in row['datetime']:
        row['datetime'] = row['datetime'].replace('24:00', '00:00')
    try:
        times.append(dt.datetime.strptime(row['datetime'],date_format))
    except Exception as e:
        print(e)
        
ufo_df['time'] = [t.strftime(final_format) for t in times]
ufo_df.drop(columns=['datetime', 'date posted'], inplace=True)
ufo_df.head()

Unnamed: 0,city,state,country,shape,duration (seconds),duration (hours/min),comments,longitude_string,latitude_string,time
0,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,98W,30N,1949/10/10
1,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,99W,29N,1949/10/10
2,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,3W,53N,1955/10/10
3,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,97W,29N,1956/10/10
4,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,158W,21N,1960/10/10


In [12]:
date_format = '%Y %B %d'
time_format = '%H:%M:%S'
final_format = '%Y/%m/%d'
times = []
for index, row in eclipse_df.iterrows():
    calendar_date = row['Calendar Date'][1:]

    try:
        if calendar_date.split(' ')[0] == '' :
            eclipse_df.drop([index], inplace=True)
            continue
        elif int(calendar_date.split(' ')[0]) < 1900:
            eclipse_df.drop([index], inplace=True)
            continue
        times.append(dt.datetime.strptime(calendar_date + ' ' + row['Eclipse Time'],date_format + ' ' + time_format))
    except Exception as e:
        print(e)
        
eclipse_df['time'] = [t.strftime(final_format) for t in times]
eclipse_clean_df = eclipse_df.drop(columns=['Calendar Date', 'Eclipse Time'])
eclipse_clean_df.head()

Unnamed: 0,Catalog Number,Delta T (s),Lunation Number,Saros Number,Eclipse Type,Quincena Solar Eclipse,Gamma,Penumbral Magnitude,Umbral Magnitude,Latitude,Longitude,Penumbral Eclipse Duration (m),Partial Eclipse Duration (m),Total Eclipse Duration (m),time
0,1,46437,-49456,17,N,t-,-1.0981,0.8791,-0.1922,24S,22W,268.8,-,-,1999/06/26
1,2,46427,-49451,-16,N,-a,-1.1155,0.8143,-0.1921,15N,98W,233.4,-,-,1999/11/21
2,3,46416,-49445,-11,P,-t,0.8988,1.2105,0.2069,13S,89E,281.7,102.7,-,1998/05/17
3,4,46404,-49439,-6,P,-a,-0.4644,2.0382,0.974,12N,113E,343.4,200.8,-,1998/11/11
4,5,46392,-49433,-1,T+,pp,0.1003,2.6513,1.6963,11S,92W,322.8,213.5,98.2,1997/05/06


In [13]:
# Create a connection string:
connection_string = f'root@localhost'
engine = create_engine(f'mysql://root@localhost')

In [14]:
# Create a database and "use" it to go any further:
engine.execute("create database if not exists lunar_ufo_db")
engine.execute("use lunar_ufo_db")

<sqlalchemy.engine.result.ResultProxy at 0x2161be2c160>

In [15]:
eclipse_clean_df.head()

Unnamed: 0,Catalog Number,Delta T (s),Lunation Number,Saros Number,Eclipse Type,Quincena Solar Eclipse,Gamma,Penumbral Magnitude,Umbral Magnitude,Latitude,Longitude,Penumbral Eclipse Duration (m),Partial Eclipse Duration (m),Total Eclipse Duration (m),time
0,1,46437,-49456,17,N,t-,-1.0981,0.8791,-0.1922,24S,22W,268.8,-,-,1999/06/26
1,2,46427,-49451,-16,N,-a,-1.1155,0.8143,-0.1921,15N,98W,233.4,-,-,1999/11/21
2,3,46416,-49445,-11,P,-t,0.8988,1.2105,0.2069,13S,89E,281.7,102.7,-,1998/05/17
3,4,46404,-49439,-6,P,-a,-0.4644,2.0382,0.974,12N,113E,343.4,200.8,-,1998/11/11
4,5,46392,-49433,-1,T+,pp,0.1003,2.6513,1.6963,11S,92W,322.8,213.5,98.2,1997/05/06


In [16]:
ufo_df.head()

Unnamed: 0,city,state,country,shape,duration (seconds),duration (hours/min),comments,longitude_string,latitude_string,time
0,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,98W,30N,1949/10/10
1,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,99W,29N,1949/10/10
2,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,3W,53N,1955/10/10
3,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,97W,29N,1956/10/10
4,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,158W,21N,1960/10/10


In [17]:
eclipse_clean_df.to_sql(
    con=engine,
    name="eclipses_clean", 
    if_exists="append", 
    index=False)

In [18]:
ufo_df.to_sql(
    con=engine,
    name="ufos", 
    if_exists="append", 
    index=False)

In [19]:
engine.execute("use lunar_ufo_db")
max_min = engine.execute("SELECT MAX(time) as max, MIN(time) as min FROM eclipses_clean")
for x in max_min:
    maximum = x[0]
    minimum = x[1]
engine.execute(f"""CREATE OR REPLACE VIEW non_eclipse_sightings AS
SELECT COUNT(u.comments) as count
FROM ufos u
LEFT JOIN eclipses_clean e ON e.time = u.time 
WHERE e.`Catalog Number` IS NULL AND u.time >= '{minimum}' AND u.time <= '{maximum}'
GROUP BY u.time""")
print(maximum)
print(minimum)
non_count = engine.execute("SELECT SUM(count) FROM non_eclipse_sightings")
for x in non_count:
    non_eclipse_count = x[0]
    print(x[0])

1999/11/21
1900/02/15
45522


In [20]:
engine.execute("use lunar_ufo_db")
engine.execute(f"""CREATE OR REPLACE VIEW eclipse_sightings AS
SELECT COUNT(u.comments) as count
FROM ufos u
LEFT JOIN eclipses_clean e ON e.time = u.time 
WHERE e.`Catalog Number` IS NOT NULL AND u.time >= '{minimum}' AND u.time <= '{maximum}'
GROUP BY u.time""")

count = engine.execute("SELECT SUM(count) FROM eclipse_sightings")
for x in count:
    eclipse_count = x[0]
    print(x[0])

594


In [21]:
engine.execute("use lunar_ufo_db")
days = engine.execute("SELECT datediff(MAX(time), MIN(time)) FROM ufos")
for x in days:
    total_days = x[0]
    print(x[0])

39260


In [22]:
engine.execute("use lunar_ufo_db")
days = engine.execute("SELECT COUNT(DISTINCT(time)) FROM eclipses_clean")
for x in days:
    eclipse_days = x[0]
    print(x[0])

242


In [25]:
days_without_eclipse = total_days - eclipse_days

sightings_per_day_normal = non_eclipse_count / days_without_eclipse
sightings_per_day_eclipse = eclipse_count / eclipse_days

print(f"sightings per day without eclipse: {sightings_per_day_normal}")
print(f"sightings per day with eclipse: {sightings_per_day_eclipse}")

sightings per day without eclipse: 1.166692295863447639550976472
sightings per day with eclipse: 2.454545454545454545454545455
