# Divvy Bike Share bike rides in Chicago during the month of July in 2021
[Data Divvy Bike Share Chicago 2021 July](https://www.kaggle.com/maxglover/divvy-bike-share-chicago-2021-july?select=202107-divvy-tripdata.csv)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly 
import plotly.express as px
import plotly.graph_objs as go
import plotly.offline as py
from plotly.offline import iplot
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

In [2]:
df = pd.read_csv('db/divvy-trip-2021-july.csv')
df

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
0,0A1B623926EF4E16,docked_bike,2021-07-02 14:44:36,2021-07-02 15:19:58,Michigan Ave & Washington St,13001,Halsted St & North Branch St,KA1504000117,41.883984,-87.624684,41.899368,-87.648480,casual
1,B2D5583A5A5E76EE,classic_bike,2021-07-07 16:57:42,2021-07-07 17:16:09,California Ave & Cortez St,17660,Wood St & Hubbard St,13432,41.900363,-87.696704,41.889899,-87.671473,casual
2,6F264597DDBF427A,classic_bike,2021-07-25 11:30:55,2021-07-25 11:48:45,Wabash Ave & 16th St,SL-012,Rush St & Hubbard St,KA1503000044,41.860384,-87.625813,41.890173,-87.626185,member
3,379B58EAB20E8AA5,classic_bike,2021-07-08 22:08:30,2021-07-08 22:23:32,California Ave & Cortez St,17660,Carpenter St & Huron St,13196,41.900363,-87.696704,41.894556,-87.653449,member
4,6615C1E4EB08E8FB,electric_bike,2021-07-28 16:08:06,2021-07-28 16:27:09,California Ave & Cortez St,17660,Elizabeth (May) St & Fulton St,13197,41.900350,-87.696682,41.886593,-87.658387,casual
...,...,...,...,...,...,...,...,...,...,...,...,...,...
822405,1E660BF8DCDAAAB6,electric_bike,2021-07-04 10:40:41,2021-07-04 11:30:13,,,,,41.720000,-87.680000,41.770000,-87.660000,member
822406,A2448BDFD9B3653D,electric_bike,2021-07-04 12:47:41,2021-07-04 12:54:46,,,,,41.770000,-87.660000,41.780000,-87.660000,member
822407,2D612BF8530379DA,electric_bike,2021-07-03 21:41:58,2021-07-03 21:57:14,,,,,41.720000,-87.680000,41.720000,-87.680000,member
822408,6D615D18B765C9B3,electric_bike,2021-07-03 22:10:31,2021-07-03 22:11:39,,,,,41.720000,-87.680000,41.720000,-87.680000,member


# Observando los datos

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 822410 entries, 0 to 822409
Data columns (total 13 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   ride_id             822410 non-null  object 
 1   rideable_type       822410 non-null  object 
 2   started_at          822410 non-null  object 
 3   ended_at            822410 non-null  object 
 4   start_station_name  735147 non-null  object 
 5   start_station_id    735148 non-null  object 
 6   end_station_name    729252 non-null  object 
 7   end_station_id      729252 non-null  object 
 8   start_lat           822410 non-null  float64
 9   start_lng           822410 non-null  float64
 10  end_lat             821679 non-null  float64
 11  end_lng             821679 non-null  float64
 12  member_casual       822410 non-null  object 
dtypes: float64(4), object(9)
memory usage: 81.6+ MB


Podemos observar la mayoria de los tipos de datos estan correctos, excepto los campos **started_at, ended_at** que deberian ser de tipo datetimes.

Ahora vamos a cambiarlos a tipo datetimes/.

In [4]:
df['started_at'] = pd.to_datetime(df['started_at'])
df['ended_at'] = pd.to_datetime(df['ended_at'])

In [5]:
df.isnull().sum()

ride_id                   0
rideable_type             0
started_at                0
ended_at                  0
start_station_name    87263
start_station_id      87262
end_station_name      93158
end_station_id        93158
start_lat                 0
start_lng                 0
end_lat                 731
end_lng                 731
member_casual             0
dtype: int64

Podemos observar que existen varia

In [6]:
df.describe()

Unnamed: 0,start_lat,start_lng,end_lat,end_lng
count,822410.0,822410.0,821679.0,821679.0
mean,41.903565,-87.645536,41.903815,-87.645662
std,0.043153,0.026856,0.043289,0.027065
min,41.648501,-87.84,41.63,-87.85
25%,41.88338,-87.65966,41.88338,-87.659753
50%,41.900219,-87.64117,41.90096,-87.64117
75%,41.929143,-87.627691,41.929505,-87.62768
max,42.07,-87.52,42.15,-87.49


# Analisis

## Tipo de paseo

In [7]:
ride_type = df['rideable_type'].value_counts()
fig = px.pie(ride_type, values=ride_type.values, 
    names=ride_type.index,
    title='Tipo de paseos'
)
fig.show()

## Uso de los usuarios

### Uso de usuarios sin membresía

In [19]:
df['ones'] = 1 # Nos ayuda a cuantificar los datos categoricos
member_casual = df[(df['member_casual'] == 'casual')]
member_casual = member_casual.groupby(['member_casual','rideable_type'])[['ones']].sum()
member_casual = member_casual.reset_index()
member_casual

Unnamed: 0,member_casual,rideable_type,ones
0,casual,classic_bike,241489
1,casual,docked_bike,57698
2,casual,electric_bike,142869


In [34]:
fig = px.bar(member_casual, x='rideable_type',
    y='ones',
    labels={'ones':'Total','rideable_type':'Tipo de paseo'},
    color='rideable_type',
    title='Usuario sin membresia'
)
fig.update_layout(xaxis = {'categoryorder':'total descending'})
fig.show()

### Uso de usuarios con membresía

In [36]:
member = df[(df['member_casual'] == 'member')]
member = member.groupby(['member_casual','rideable_type'])[['ones']].sum()
member = member.reset_index()
member

Unnamed: 0,member_casual,rideable_type,ones
0,member,classic_bike,265420
1,member,electric_bike,114934


In [35]:
fig = px.bar(member, x='rideable_type',
    y='ones',
    labels={'ones':'Total','rideable_type':'Tipo de paseo'},
    color='rideable_type',
    title='Usuario con membresia'
)
fig.update_layout(xaxis = {'categoryorder':'total descending'})
fig.show()

### Conclusión

In [54]:
all_user = df.groupby(['member_casual','rideable_type'])[['ones']].sum()
all_user = all_user.reset_index()
all_user

Unnamed: 0,member_casual,rideable_type,ones
0,casual,classic_bike,241489
1,casual,docked_bike,57698
2,casual,electric_bike,142869
3,member,classic_bike,265420
4,member,electric_bike,114934


In [51]:
fig = px.bar(all_user, x='rideable_type',
    y='ones',
    labels={'ones':'Total','rideable_type':'Tipo de paseo'},
    color='member_casual',
    title='Usuario con membresia',
    hover_data=all_user[['member_casual']],
)
fig.update_layout(xaxis = {'categoryorder':'total descending'})
fig.show()

- Tras analizar las gráficas se puede observar que los usuarios **con membresía** usan mas las bicicletas clásicas.
- Los usuarios **sin membresía** usan más las bicicletas eléctricas.

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=322df7a5-4b18-4ddb-8f91-c142e3bf7671' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>