# Data analysis for Cycle Share Data set 


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df_station = pd.read_csv('station.csv')
df_station.keys()

Index(['station_id', 'name', 'lat', 'long', 'install_date',
       'install_dockcount', 'modification_date', 'current_dockcount',
       'decommission_date'],
      dtype='object')

In [3]:
df_weather = pd.read_csv('weather.csv')
df_weather.keys()

Index(['Date', 'Max_Temperature_F', 'Mean_Temperature_F', 'Min_TemperatureF',
       'Max_Dew_Point_F', 'MeanDew_Point_F', 'Min_Dewpoint_F', 'Max_Humidity',
       'Mean_Humidity', 'Min_Humidity', 'Max_Sea_Level_Pressure_In',
       'Mean_Sea_Level_Pressure_In', 'Min_Sea_Level_Pressure_In',
       'Max_Visibility_Miles', 'Mean_Visibility_Miles', 'Min_Visibility_Miles',
       'Max_Wind_Speed_MPH', 'Mean_Wind_Speed_MPH', 'Max_Gust_Speed_MPH',
       'Precipitation_In', 'Events'],
      dtype='object')

In [4]:
df_trip = pd.read_csv('trip.csv', error_bad_lines=False)
df_trip.keys()

b'Skipping line 50794: expected 12 fields, saw 20\n'


Index(['trip_id', 'starttime', 'stoptime', 'bikeid', 'tripduration',
       'from_station_name', 'to_station_name', 'from_station_id',
       'to_station_id', 'usertype', 'gender', 'birthyear'],
      dtype='object')

### What is the average trip duration for a borrowed bicycle?

In [5]:
df_trip['tripduration'].mean()

1178.2956753399776

### What’s the most common age of a bicycle-sharer?

In [7]:
2020 - df_trip['birthyear'].mode()[0]

33.0

### Given all the weather data here, find the average precipitation per month, and the median precipitation.

In [40]:

def to_month(cell):
    return int(cell.split('/')[0])
df_weather['Month']=df_weather['Date'].apply(to_month)
df_weather['Month']

0      10
1      10
2      10
3      10
4      10
       ..
684     8
685     8
686     8
687     8
688     8
Name: Month, Length: 689, dtype: int64

In [41]:
df_weather.groupby('Month')['Precipitation_In'].mean()


Month
1     0.143548
2     0.168421
3     0.156935
4     0.051333
5     0.012419
6     0.030500
7     0.012097
8     0.018226
9     0.041000
10    0.189000
11    0.187833
12    0.236290
Name: Precipitation_In, dtype: float64

In [42]:
df_weather.groupby('Month')['Precipitation_In'].median()


Month
1     0.020
2     0.040
3     0.025
4     0.000
5     0.000
6     0.000
7     0.000
8     0.000
9     0.000
10    0.040
11    0.035
12    0.100
Name: Precipitation_In, dtype: float64

### What’s the average number of bikes at a given bike station?


In [66]:
with_decommission_date = df_station[df_station['decommission_date'].isnull()]


In [67]:
with_decommission_date['current_dockcount'].mean()

17.74074074074074

### When a bike station is modified, is it more likely that it’ll lose bikes or gain bikes? How do you know?

In [70]:
new = df_station[['modification_date','install_dockcount','current_dockcount']].dropna() #remove any row with no value
new

Unnamed: 0,modification_date,install_dockcount,current_dockcount
7,11/9/2015,20,18
10,8/9/2016,16,0
12,2/24/2015,18,20
17,2/24/2015,28,26
22,3/24/2015,30,24
23,3/27/2015,12,16
26,3/18/2016,16,0
31,2/24/2015,18,20
35,3/13/2015,12,20
37,2/23/2015,18,16


In [71]:
new_current_dockcount = new['current_dockcount'].sum()
new_install_dockcount = new['install_dockcount'].sum()
new_current_dockcount-new_install_dockcount

-64

### What is the avg max tempreture for every month ?

In [72]:

df_weather['Month']=df_weather['Date'].apply(to_month)
df_weather.groupby('Month')['Max_Temperature_F'].mean()


Month
1     50.516129
2     55.157895
3     58.387097
4     63.900000
5     69.258065
6     76.233333
7     79.629032
8     79.790323
9     70.266667
10    64.760000
11    52.866667
12    50.241935
Name: Max_Temperature_F, dtype: float64

### What is the maximum five temperature at all days ?

In [75]:
df_weather.sort_values('Max_Temperature_F', ascending=False).head()[['Date','Max_Temperature_F']]

Unnamed: 0,Date,Max_Temperature_F
279,7/19/2015,98
676,8/19/2016,95
263,7/3/2015,93
291,7/31/2015,93
682,8/25/2016,93


### What is the station name where install_dockcount is 18?


In [78]:
new_station_last = df_station[df_station['install_dockcount'] == 18]
new_station_last['name']

0                                    3rd Ave & Broad St
5                                    Union St & 4th Ave
9                                     2nd Ave & Pine St
12                       E Harrison St & Broadway Ave E
16                                 E Pine St & 16th Ave
19                             Bellevue Ave & E Pine St
21                                9th Ave N & Mercer St
24                         E Blaine St & Fairview Ave E
25                        Eastlake Ave E & E Allison St
29    Occidental Park / Occidental Ave S & S Washing...
30    King Street Station Plaza / 2nd Ave Extension ...
31                           REI / Yale Ave N & John St
32                              Dexter Ave N & Aloha St
33                       Republican St & Westlake Ave N
34                        PATH / 9th Ave & Westlake Ave
37            Lake Union Park / Valley St & Boren Ave N
42    Burke-Gilman Trail / NE Blakeley St & 24th Ave NE
43                       NE 42nd St & University