In [1]:
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
from datetime import date
import seaborn as sns
import calendar
os.chdir(r"C:\Users\Henry\Desktop\Project2_HtmlSite\bike trip data csv")

In [2]:
#Match the pattern (‘csv’) and save the list of file names in the ‘all_filenames’ variable. 
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

In [3]:
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "combined_csv.csv", index=False, encoding='utf-8-sig')

In [None]:
# Make a reference to the file path
csv_path = "combined_csv.csv"

# Import the combined_csv file as a DataFrame
citibike_df = pd.read_csv(csv_path, encoding="utf-8")
citibike_df.head()

In [None]:
# Select Relevant Columns in new dataframe
citibike = citibike_df[['tripduration', 'starttime','stoptime', 'start station latitude', 'start station longitude', 
                                                 'usertype', 'birth year', 'gender']].copy()
citibike.head()

In [None]:
#Change gender to Unknown (0), Male (1), Female (2) - Per Citibike Data
citibike['gender'].replace([0,1,2],['Unknown','Male','Female'],inplace=True)
# # convert tripduration from seconds to minutes
# citibike['tripduration'] = (citibike['tripduration'] / 60).round(2)
#subtract birth year by 2019 to ascertain age of rider
citibike['birth year'] = citibike['birth year'] - 2019
citibike.head()

In [None]:
#convert birth year negative numbers into positive numbers
citibike['birth year2'] = citibike['birth year']*-1
citibike.head()

In [None]:
#Change StartTime to Year and Time Column and Month to Month Column
citibike['starttime'] = pd.to_datetime(citibike['starttime'])
citibike['stoptime'] = pd.to_datetime(citibike['stoptime'])
citibike.head()

In [None]:
# Rename comlumns
citibike2 = citibike.rename(columns={'birth year2': 'age'})
citibike2.head()

In [None]:
#Extract Month, Day of the Week, Time from startime column and stoptime column and drop birth year
citibike2 = citibike2[['tripduration', 'starttime','start station latitude', 'start station longitude', 
                                                 'usertype', 'gender', 'age']].copy()
citibike2.head()

In [None]:
#Remove all A
citibike_final = citibike2.loc[citibike2["age"] < 100]
citibike_final.head()

In [None]:
citibike_final['starttime'] = pd.to_datetime(citibike_final['starttime'])
citibike_final.head()

In [None]:
#Add Year to Year Column and Month to Month Column
citibike2['starttime - Day'] = citibike2['starttime'].dt.day
citibike2['starttime - Month'] = citibike2['starttime'].dt.month_name()
citibike2['starttime - Time'] = citibike2['starttime'].dt.time
citibike2['starttime - Weekday'] = citibike2['starttime'].dt.weekday
citibike2.head()

In [None]:
#Replace Weekday Numbers with Days of the Week
citibike2['starttime - Weekday'].replace([0,1,2,3,4,5,6],['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'],inplace=True)
citibike2.head()

In [None]:
#Drop Starttime and Stoptime Columns
citibike2.drop(columns=["starttime"], inplace=True)
citibike2.head()

In [None]:
#Value Count of Starttime - Weekday Trips
weekdaytrips_df = citibike2['starttime - Weekday'].value_counts().reset_index().rename(columns={'index': 'Weekday', "starttime - Weekday": 'Count'})
weekdaytrips_df["Weekday Number"] = (2, 1, 4, 3, 0, 5, 6)
weekdaytrips_df = weekdaytrips_df.sort_values('Weekday Number',ascending=True)
weekdaytrips_df

In [None]:
#Bar Graph for Monthly Trips
weekdaytrips_df.plot.bar(x= 'Weekday', y = 'Count')
plt.rcParams['figure.figsize'] = (10,6)
plt.xlabel('Month', fontweight="bold", fontsize=16)
plt.ylabel('Count', fontweight="bold", fontsize=16)
plt.title('Total Weekday Bike Trips', fontweight="bold", fontsize=20)
plt.bar
fig1 = plt.gcf()

In [None]:
#Save Image of Graph
fig1.savefig("./Images/WeekdayCount.png")

In [None]:
#Value Count of Starttime - Weekday Trips
monthlytrips_df = citibike2['starttime - Month'].value_counts().reset_index().rename(columns={'index': 'Month', "starttime - Month": 'Count'})
monthlytrips_df["Month Number"] = (7,6,5,4,3,1,2)
monthlytrips_df = monthlytrips_df.sort_values('Month Number',ascending=True)
monthlytrips_df

In [None]:
#Bar Graph for Monthly Trips
monthlytrips_df.plot.bar(x= 'Month', y = 'Count', color="r")
plt.rcParams['figure.figsize'] = (10,6)
plt.ylabel('Count', fontweight="bold", fontsize=16)
plt.xlabel('Month', fontweight="bold", fontsize=16)
plt.title('Total Monthly Bike Trips', fontweight="bold", fontsize=20)
plt.bar
fig1 = plt.gcf()

In [None]:
#Save Image of Graph
fig1.savefig("./Images/MonthlyBikeTrips.png")

In [None]:
#Gender
gender_df = citibike2["gender"].value_counts().reset_index().rename(columns={'index': 'Gender', "Gender": 'Count'})
gender_df.head()

In [None]:
#Creat Pie Graph for Male, Femal, Unknown
# Labels for the sections of our pie chart
labels = ["Male", "Female", "Unknown",]

# Pie Chart Values
sizes = [7745742, 2646723, 843617]

# The colors of each section of the pie chart
colors = ["#66b3ff", "#ff9999", "#99ff99"]

# Tells matplotlib to seperate the "Python" section from the others
explode = (0, .1,.19)

In [None]:
# Creates the pie chart based upon the values above
# Automatically finds the percentages of each part of the pie chart
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=200)
plt.title("Total Fatalities Sorted By Gender", fontweight="bold", fontsize=20)
# Create axes which are equal so we have a perfect circle
plt.axis("equal")
fig5 = plt.gcf()

In [None]:
#Save Image of Graph
fig1.savefig("./Images/TotalFatalitiesSortedbyGender.png")

In [None]:
agecheck = citibike.loc[citibike["birth year2"] < 100]
agecheck.head()