In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Read file into DataFrame
ufo_df = pd.read_csv('Resources/ufoSightings.csv', low_memory=False)

# Remove the rows with missing data
clean_ufo_df = ufo_df.dropna(how="any")

# Converting the "duration (seconds)" column's values to numeric
converted_ufo_df = clean_ufo_df.copy()
converted_ufo_df["duration (seconds)"] = converted_ufo_df.loc[:, "duration (seconds)"].astype(float)

# Change the 'date' column to a datetime object because resample will only work on datetime data types.
converted_ufo_df['datetime']= pd.to_datetime(converted_ufo_df['datetime'], errors='coerce')

# Drop the values that didn't get converted to a datetime format. 
converted_ufo_df = converted_ufo_df.dropna(subset=['datetime']).reset_index(drop=True)
converted_ufo_df.head(20)

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,2049-10-10 20:30:00,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/04,29.8830556,-97.941111
1,2056-10-10 21:00:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/04,28.9783333,-96.645833
2,2060-10-10 20:00:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/04,21.4180556,-157.803611
3,2061-10-10 19:00:00,bristol,tn,us,sphere,300.0,5 minutes,My father is now 89 my brother 52 the girl wit...,4/27/07,36.595,-82.188889
4,2065-10-10 23:45:00,norwalk,ct,us,disk,1200.0,20 minutes,A bright orange color changing to reddish colo...,10/2/99,41.1175,-73.408333
5,2066-10-10 20:00:00,pell city,al,us,disk,180.0,3 minutes,Strobe Lighted disk shape object observed clos...,3/19/09,33.5861111,-86.286111
6,2066-10-10 21:00:00,live oak,fl,us,disk,120.0,several minutes,Saucer zaps energy from powerline as my pregna...,5/11/05,30.2947222,-82.984167
7,2068-10-10 13:00:00,hawthorne,ca,us,circle,300.0,5 min.,ROUND &#44 ORANGE &#44 WITH WHAT I WOULD SAY W...,10/31/03,33.9163889,-118.351667
8,2068-10-10 19:00:00,brevard,nc,us,fireball,180.0,3 minutes,silent red /orange mass of energy floated by t...,6/12/08,35.2333333,-82.734444
9,2070-10-10 16:00:00,bellmore,ny,us,disk,1800.0,30 min.,silver disc seen by family and neighbors,5/11/00,40.6686111,-73.5275


In [3]:
# Get the columns.
converted_ufo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 66042 entries, 0 to 66041
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   datetime              66042 non-null  datetime64[ns]
 1   city                  66042 non-null  object        
 2   state                 66042 non-null  object        
 3   country               66042 non-null  object        
 4   shape                 66042 non-null  object        
 5   duration (seconds)    66042 non-null  float64       
 6   duration (hours/min)  66042 non-null  object        
 7   comments              66042 non-null  object        
 8   date posted           66042 non-null  object        
 9   latitude              66042 non-null  object        
 10  longitude             66042 non-null  float64       
dtypes: datetime64[ns](1), float64(2), object(8)
memory usage: 5.5+ MB


### Apply the `resample()` Function

In [4]:
# Create a pivot table with the 'datetime' as the index, the columns ='duration (seconds)', and the "sum" as the values.
ufo_pivot = pd.pivot_table(converted_ufo_df, 
                                            index=['datetime'],
                                            values='duration (seconds)',
                                            aggfunc='sum')
# Show the table.
ufo_pivot.head(20)

Unnamed: 0_level_0,duration (seconds)
datetime,Unnamed: 1_level_1
1973-01-01 23:30:00,30.0
1973-01-12 03:00:00,180.0
1973-01-14 19:00:00,10.0
1973-01-28 23:38:00,600.0
1973-02-01 22:00:00,300.0
1973-02-01 23:00:00,3.0
1973-02-02 21:00:00,90.0
1973-02-10 16:00:00,120.0
1973-03-01 01:00:00,600.0
1973-03-01 06:00:00,15.0


In [5]:
# Resample the pivot table into weekly bins 
# and get the average duration in seconds for each week rounded to one decimal place.


# Sort the resampled pivot table in ascending order on "duration (seconds)".


Unnamed: 0_level_0,duration (seconds)
datetime,Unnamed: 1_level_1
1991-09-15,16569600.0
1994-03-06,2637017.5
1984-03-18,1974630.0
2010-06-06,1562988.3
1993-03-07,1263144.0
2002-08-25,836482.3
2045-08-20,777600.0
2066-09-19,522692.1
1987-08-02,484080.0
2058-05-12,432000.0


In [6]:
# Resample the pivot table into monthly bins 
# and get the average duration in seconds for each month rounded to one decimal place.

# Sort the resampled pivot table in ascending order on "duration (seconds)".


Unnamed: 0_level_0,duration (seconds)
datetime,Unnamed: 1_level_1
1991-09-30,3316256.1
1984-03-31,878182.2
1993-03-31,703425.0
1994-03-31,659793.6
2045-08-31,388950.0
2010-06-30,338276.6
2058-05-31,216600.0
2066-09-30,215923.2
2002-08-31,175283.4
2012-08-31,96183.0


In [7]:
# Create a pivot table with the 'datetime' as the index, the columns ='shape', and the "count" as the values.
ufo_pivot_sum = pd.pivot_table(converted_ufo_df, 
                                            index=['datetime'],
                                            values='shape',
                                            aggfunc='count')
# Show the table.
ufo_pivot_sum.head(20)

Unnamed: 0_level_0,shape
datetime,Unnamed: 1_level_1
1973-01-01 23:30:00,1
1973-01-12 03:00:00,1
1973-01-14 19:00:00,1
1973-01-28 23:38:00,1
1973-02-01 22:00:00,1
1973-02-01 23:00:00,1
1973-02-02 21:00:00,1
1973-02-10 16:00:00,1
1973-03-01 01:00:00,1
1973-03-01 06:00:00,1


In [8]:
# Resample the pivot table into weekly bins and get the total number of sightings for each week.


# Sort the resampled pivot table in ascending order on "shape".


Unnamed: 0_level_0,shape
datetime,Unnamed: 1_level_1
2013-07-07,366
2012-07-08,317
2010-07-04,303
2014-01-05,236
2012-08-12,215
1999-11-21,208
2011-07-10,203
2009-09-20,198
2013-12-29,196
2012-11-11,190


In [9]:
# Resample the pivot table into monthly bins and get the total number of sightings for each month.


# Sort the resampled pivot table in ascending order on "shape".


Unnamed: 0_level_0,shape
datetime,Unnamed: 1_level_1
2013-07-31,794
2012-07-31,787
2013-08-31,746
2012-08-31,718
2010-07-31,690
2012-11-30,646
2013-10-31,639
2013-11-30,634
2013-09-30,629
2011-07-31,629


### Apply the `melt()` Function

In [10]:
# Read the book_sales.csv file into a DataFrame
book_sales_df = pd.read_csv('Resources/book_sales.csv')

# Pivot on the date_ending with the book_name as the index, and pass the "total_sales" as the values.
# Remove the index axis "date_ending".
book_sales_pivot = pd.pivot(book_sales_df, columns="date_ending",index="book_name",values="total_sales" ).rename_axis(None, axis=1)

# Reset the index so "book_name" is a column.
book_sales_reindexed = book_sales_pivot.reset_index()
book_sales_reindexed

Unnamed: 0,book_name,10/31/23,11/30/23,12/31/23,8/31/23,9/30/23
0,Foundation,75,50,125,60,100
1,Foundation and Earth,25,30,20,0,20
2,Foundation and Empire,75,60,75,40,50
3,Foundation's Edge,25,30,30,20,30
4,Second Foundation,50,100,50,35,40
5,The Fellowship of the Ring (The Lord of the Ri...,125,100,175,80,150
6,The Hobbit,200,200,250,150,100
7,"The Return of the King (The Lord of the Rings,...",200,50,200,100,125
8,"The Two Towers (The Lord of the Rings, Part 2)",225,100,150,100,75


In [11]:
# Convert the DataFrame from short form to long form. 
# Melt the DataFrame


Unnamed: 0,variable,value
0,book_name,Foundation
1,book_name,Foundation and Earth
2,book_name,Foundation and Empire
3,book_name,Foundation's Edge
4,book_name,Second Foundation
5,book_name,The Fellowship of the Ring (The Lord of the Ri...
6,book_name,The Hobbit
7,book_name,"The Return of the King (The Lord of the Rings,..."
8,book_name,"The Two Towers (The Lord of the Rings, Part 2)"
9,10/31/23,75


In [12]:
# Convert the DataFrame using the variable ("book_name") we'd like to keep in the long DataFrame.


Unnamed: 0,book_name,variable,value
0,Foundation,10/31/23,75
1,Foundation and Earth,10/31/23,25
2,Foundation and Empire,10/31/23,75
3,Foundation's Edge,10/31/23,25
4,Second Foundation,10/31/23,50
5,The Fellowship of the Ring (The Lord of the Ri...,10/31/23,125
6,The Hobbit,10/31/23,200
7,"The Return of the King (The Lord of the Rings,...",10/31/23,200
8,"The Two Towers (The Lord of the Rings, Part 2)",10/31/23,225
9,Foundation,11/30/23,50


In [14]:
# Convert the DataFrame and rename the columns to reflect the values. 


Unnamed: 0,book_name,date,total_sales
0,Foundation,10/31/23,75
1,Foundation and Earth,10/31/23,25
2,Foundation and Empire,10/31/23,75
3,Foundation's Edge,10/31/23,25
4,Second Foundation,10/31/23,50
5,The Fellowship of the Ring (The Lord of the Ri...,10/31/23,125
6,The Hobbit,10/31/23,200
7,"The Return of the King (The Lord of the Rings,...",10/31/23,200
8,"The Two Towers (The Lord of the Rings, Part 2)",10/31/23,225
9,Foundation,11/30/23,50


In [15]:
# Group the previous DataFrame on the date and show the total sales by the "date".


Unnamed: 0_level_0,total_sales
date,Unnamed: 1_level_1
10/31/23,1000
11/30/23,720
12/31/23,1075
8/31/23,585
9/30/23,690
