In [1]:
import pandas as pd
import seaborn as sns
import cufflinks as cf
from IPython.display import display,HTML
cf.set_config_file(sharing='public',theme='polar',offline=True) # write cf.getThemes() to find all themes available

In [2]:
cf.getThemes()

['ggplot', 'pearl', 'solar', 'space', 'white', 'polar', 'henanigans']

In [3]:
df = pd.read_csv("UberDataset.csv")

In [4]:
df.head()

Unnamed: 0,START_DATE,END_DATE,CATEGORY,START,STOP,MILES,PURPOSE
0,01-01-2016 21:11,01-01-2016 21:17,Business,Fort Pierce,Fort Pierce,5.1,Meal/Entertain
1,01-02-2016 01:25,01-02-2016 01:37,Business,Fort Pierce,Fort Pierce,5.0,
2,01-02-2016 20:25,01-02-2016 20:38,Business,Fort Pierce,Fort Pierce,4.8,Errand/Supplies
3,01-05-2016 17:31,01-05-2016 17:45,Business,Fort Pierce,Fort Pierce,4.7,Meeting
4,01-06-2016 14:42,01-06-2016 15:49,Business,Fort Pierce,West Palm Beach,63.7,Customer Visit


In [5]:
# Convertir la columna START_DATE al formato de fecha
df['START_DATE'] = pd.to_datetime(df['START_DATE'], errors='coerce')

# Extraer solo la fecha del mes
df['YEAR'] = df['START_DATE'].dt.to_period('Y')

# Mostrar el resultado
print(df[['START_DATE', 'YEAR']])


              START_DATE  YEAR
0    2016-01-01 21:11:00  2016
1    2016-01-02 01:25:00  2016
2    2016-01-02 20:25:00  2016
3    2016-01-05 17:31:00  2016
4    2016-01-06 14:42:00  2016
...                  ...   ...
1151                 NaT   NaT
1152                 NaT   NaT
1153                 NaT   NaT
1154                 NaT   NaT
1155                 NaT   NaT

[1156 rows x 2 columns]


In [6]:
df["YEAR"].fillna(2016, inplace = True)


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '2016' has dtype incompatible with period[A-DEC], please explicitly cast to a compatible dtype first.



In [7]:
df.sample(5)

Unnamed: 0,START_DATE,END_DATE,CATEGORY,START,STOP,MILES,PURPOSE,YEAR
755,2016-09-12 08:07:00,09-12-2016 08:12,Business,Unknown Location,Unknown Location,3.6,,2016
338,NaT,4/29/2016 13:34,Business,Durham,Cary,10.0,Meeting,2016
682,NaT,8/19/2016 17:52,Business,Islamabad,Unknown Location,12.5,,2016
727,NaT,8/27/2016 19:13,Business,Unknown Location,Unknown Location,156.9,,2016
800,2016-10-09 14:04:00,10-09-2016 14:23,Business,Unknown Location,Unknown Location,7.7,Temporary Site,2016


In [8]:
df["CATEGORY"].value_counts()

CATEGORY
Business    1078
Personal      77
Name: count, dtype: int64

In [9]:
import plotly.express as px

# Supongamos que tu DataFrame se llama df

fig = px.bar(df, x="PURPOSE", color="CATEGORY", 
             title="Frecuencia sobre los propósitos de viajes en UBER",
             labels={"PURPOSE": "Propósito"},
             category_orders={"PURPOSE": ["Meeting", "Meal/Entertain", "Errand/Supplies", 
                                           "Customer Visit", "Temporary Site", "Between Offices"]},
             height=600, width=800,
             barmode="stack")

fig.show()

In [10]:
df["PURPOSE"].value_counts()

PURPOSE
Meeting            187
Meal/Entertain     160
Errand/Supplies    128
Customer Visit     101
Temporary Site      50
Between Offices     18
Moving               4
Airport/Travel       3
Charity ($)          1
Commute              1
Name: count, dtype: int64

In [11]:
df.dropna(inplace = True)

In [12]:
df["PURPOSE"].value_counts()

PURPOSE
Meeting            81
Meal/Entertain     64
Errand/Supplies    47
Customer Visit     46
Temporary Site     14
Between Offices     9
Name: count, dtype: int64

In [13]:
df["PURPOSE"].value_counts()

PURPOSE
Meeting            81
Meal/Entertain     64
Errand/Supplies    47
Customer Visit     46
Temporary Site     14
Between Offices     9
Name: count, dtype: int64

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 261 entries, 0 to 1047
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   START_DATE  261 non-null    datetime64[ns]
 1   END_DATE    261 non-null    object        
 2   CATEGORY    261 non-null    object        
 3   START       261 non-null    object        
 4   STOP        261 non-null    object        
 5   MILES       261 non-null    float64       
 6   PURPOSE     261 non-null    object        
 7   YEAR        261 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(6)
memory usage: 18.4+ KB


In [15]:
df.groupby("PURPOSE")["MILES"].agg(["mean", "std"])

Unnamed: 0_level_0,mean,std
PURPOSE,Unnamed: 1_level_1,Unnamed: 2_level_1
Between Offices,10.933333,4.803384
Customer Visit,18.63913,29.948893
Errand/Supplies,4.121277,4.037322
Meal/Entertain,4.9125,2.939037
Meeting,13.822222,22.483277
Temporary Site,7.757143,3.184509


In [16]:
purpose = df["PURPOSE"].value_counts()

In [17]:
purpose.iplot(kind = "bar", 
              xTitle = "Propósito", yTitle = "Frecuencia en miles", 
              title = "Frecuencia sobre los propósitos de viajes en UBER", asFigure = True)



In [18]:
import plotly.express as px

# Supongamos que tu DataFrame se llama df

fig = px.bar(df, x="PURPOSE", color="CATEGORY", 
             title="Frecuencia sobre los propósitos de viajes en UBER",
             labels={"PURPOSE": "Propósito", "COUNT": "Frecuencia en miles"},
             category_orders={"PURPOSE": ["Meeting", "Meal/Entertain", "Errand/Supplies", 
                                           "Customer Visit", "Temporary Site", "Between Offices"]},
             height=600, width=800,
             barmode="stack")

fig.show()



In [19]:
import plotly.express as px

# Supongamos que tu DataFrame se llama df

fig = px.bar(df, x="PURPOSE", color="CATEGORY", 
             title="Frecuencia sobre los propósitos de viajes en UBER",
             labels={"PURPOSE": "Frecuencia"})

fig.show()


In [20]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Suponiendo que ya tienes la columna 'PURPOSE' en tu DataFrame
purpose_counts = df["PURPOSE"].value_counts()

# Crear un objeto de subgráficos
fig = make_subplots(rows=1, cols=1)

# Agregar la serie de barras apiladas al objeto de la figura
fig.add_trace(go.Bar(
    x=purpose_counts.index,
    y=purpose_counts.values,
    name='Frecuencia',
), row=1, col=1)

# Actualizar el diseño del gráfico
fig.update_layout(
    barmode='stack',  # Establecer el modo de apilamiento a "stack"
    xaxis=dict(title='Propósito'),
    yaxis=dict(title='Frecuencia'),
    title='Frecuencia sobre los propósitos de viajes en UBER'
)

# Mostrar el gráfico
fig.show()



In [21]:
import plotly.express as px
import pandas as pd

# Crear un DataFrame de ejemplo
data = {
    'Categoria': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Valor': [10, 15, 8, 12, 9, 6, 14, 11, 7],
    'Tiempo': ['T1', 'T1', 'T1', 'T2', 'T2', 'T2', 'T3', 'T3', 'T3']
}

df = pd.DataFrame(data)

# Crear un gráfico de barras apiladas con gráficos dinámicos
fig = px.bar(df, x='Tiempo', y='Valor', color='Categoria', 
             title='Gráfico de Barras Apiladas con Gráficos Dinámicos',
             labels={'Valor': 'Frecuencia'})

# Mostrar el gráfico
fig.show()


In [22]:
df["YEAR"] = pd.to_numeric(df["YEAR"], errors="coerce")

KeyError: 'YEAR'

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1156 entries, 0 to 1155
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   START_DATE  421 non-null    datetime64[ns]
 1   END_DATE    1155 non-null   object        
 2   CATEGORY    1155 non-null   object        
 3   START       1155 non-null   object        
 4   STOP        1155 non-null   object        
 5   MILES       1156 non-null   float64       
 6   PURPOSE     1156 non-null   object        
 7   YEAR        735 non-null    float64       
dtypes: datetime64[ns](1), float64(2), object(5)
memory usage: 72.4+ KB


In [None]:
df_nuevo = df["PURPOSE"]

In [None]:
import plotly.express as px

In [None]:
# Selecciona las columnas específicas que deseas utilizar
selected_columns = ['MILES', 'PURPOSE']
df_selected = df[selected_columns]

# Crea el gráfico de araña utilizando Plotly Express
fig = px.line_polar(df_selected, r='MILES', theta='PURPOSE', line_close=True, title='Viajes en 2023')

# Configura el modo de interacción al pasar el ratón
fig.update_layout(hovermode='x')

# Muestra el gráfico
fig.show()

In [None]:
df.iplot(kind = "line")