# UFO Sightings Analysis

## Load the CSV file
### Noah Code Starts here

In [None]:
# Import any needed functionality
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
#import gmaps

# This will hide any of the small pink messages
import warnings
warnings.filterwarnings("ignore")

# Import API Key
#from config import g_key

In [None]:
# Set file path
file = ("UFO_Data/US_UFO_Sightings.csv")

# Read the csv file
us_ufo_data = pd.read_csv(file)

In [None]:
# Display the csv file as a dataframe
us_ufo_df = pd.DataFrame(us_ufo_data)
us_ufo_df.head()

In [None]:
# Remove the "Unnamed: 0" index row that accidently came through in the cleaned file
us_ufo_data = us_ufo_df.drop(columns = ["Unnamed: 0"])
us_ufo_data.head()

## Initial Analysis

### Learning more about our data

In [None]:
# Find the length of the dataframe?
# How many sightings are recordered?
print(len(us_ufo_data))

In [None]:
# What is the min and max range for the years?
us_ufo_min_year = us_ufo_data["date"].min()
us_ufo_min_year

us_ufo_max_year = us_ufo_data["date"].max()
us_ufo_max_year

print(f"This dataset ranges from sightings recorded in {us_ufo_min_year} to {us_ufo_max_year}")

In [None]:
# What are all of the unique values given for the shape column?
alien_cars = us_ufo_data["shape"].unique()
alien_cars

### Where do aliens like to visit?

In [None]:
# Look at the state data to see which state is most visited
# Use a groupby to get the count
state_group = us_ufo_data.groupby("state")
state_group

state_visits = state_group["time"].count()
#state_visits

In [None]:
# Get the names of all of the different drug regimens to be used in the x axis
states = us_ufo_data["state"].unique()

# Need to read the list alphabetically to match with tick locations
states_alph = sorted(states)
#states_alph

In [None]:
# Generate a bar plot showing the total number of measurements taken on each drug regimen using pyplot.
x_axis = np.arange(0, len(states))

tick_locations = []

for x in x_axis:
    tick_locations.append(x)


# Change the size of the chart to make it more readable
plt.figure(figsize = (15, 5))

# Plot the bar chart
plt.bar(x_axis, state_visits, align = "center")
plt.xticks(tick_locations, states_alph, rotation = 90)

# Add legend to match above chart visually
plt.legend(["Visits"], loc = "upper center")

# Create labels for the bar plot
plt.title("Total Documented Sightings per State")
plt.xlabel("State")
plt.ylabel("Number of Sightings in the State")

# Set limits for each chart
plt.xlim(-1, len(states_alph) + 0.5)
plt.ylim(0, max(state_visits) + 1000)

# Best practice to always include plt.show
# Even though it is not technically needed in jupyter notebooks
plt.show()

In [None]:
# Generate a pie plot showing the distribution of visits across the states
# Labels for the sections of our pie chart
labels = states


plt.figure(figsize = (35, 7))


# Tell matplotlib to create a pie chart filled with corresponding percentages and displayed vertically
plt.pie(state_visits, labels = state_visits.index.values, autopct="%1.1f%%", startangle=90)
plt.title("Distribution of UFO Sightings for each state")

# Display resulting plot
plt.tight_layout()
plt.show()

In [None]:
# What city within the most visited state?

# What city is visited the most regardless of state visits? (Better vacation spot?)

In [None]:
# US Heatmap for alien visits over the years
# Configure gmaps
gmaps.configure(api_key = g_key)

# Use the lat and lng columns as the locations variable
locations = us_ufo_data[["Latitude", "Longitude"]]

# Use the state visits as the weight variable
# MIGHT HAVE TO MAKE A NEW ONE FOR CITY COUNTS IDK??
state_visits

In [None]:
# Generate the map
# From class discussion, it is important to set a center, and a zoom level to help make the map display properly
# Got the center and zoom_level values through trial and error
fig = gmaps.figure(center=(25.0, 15.0), zoom_level = 1.8)

# Generate a new layer on the map - Heat layer
heat_layer = gmaps.heatmap_layer(locations, weights = state_visits,
                                 dissipating = False, max_intensity = 100,
                                 point_radius = 3)

# Add layer
fig.add_layer(heat_layer)

# Display figure
fig

### What season do aliens like to vacation to earth?

In [None]:
# Create a new column that seperates the months into seasons
# See which season has the most visits

### Best time for cow abduction?

In [None]:
# Scatter plot to see if the time of day has any correlation with total number of visits

### Do they keep coming back?

In [None]:
# See if the total number of sightings per yer has changed?
# Scatter plot - DO WE NEED TO PULL YEAR TO ITS OWN COLUMN??

### Noah Code Ends here

## Deeper Analysis
### Mahnoor Code Starts here

### Regression or hypothesis testing

In [None]:
# Regression possibilities

#    Can we predict the number of total sightings for 2016?
#    What city will have the highest visits in 2016?
#    Can we predict what region an alien is most likely to visit on their next vacation to earth?

In [None]:
# Hypothesis testing possibilities

#    We believe that aliens prefer to visit at night, and that sightings will most likely happen between 10pm and 2am
#    We think us excitement around aliens peaks around halloween, so the majority of sightings will be in the fall (Sept - Nov)
#    We believe the midwest, not the southwest (area 51) will have the most sightings (More cows to abduct!!)
#    Is area 51 (Nevada and surrounding area) turly the hub of all recorded alien activity??

In [None]:
## We could potentially look at the 2016 csv we almost used to see how accurate our predictions were??
## Idk if that would be a stretch or like bad to compare (apples to oranges?)

### Mahnoor Code Ends here