# Importing Libraries 

In [1]:
import pandas as pd
import numpy as np
import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)

# Reading Excel Sheet

In [2]:
df=pd.read_excel('taxi_data.xlsx')

In [3]:
df.head(10)

Unnamed: 0,pickup_time,dropoff_time,trip_distance,pickup_longitude,pickup_latitude,rate_code,dropoff_longitude,dropoff_latitude,payment_type,trip_fare,tip_amount,toll_amount
0,2023-03-01 00:00:45,2023-03-01 00:39:48,18.34,-73.793167,40.65683,1,-73.793167,40.65683,2,58.34,0.0,0.0
1,2023-03-04 07:07:32,2023-03-04 07:23:35,4.84,-73.990936,40.750488,1,-74.034897,40.727131,1,50.0,12.7,0.0
2,2023-03-04 07:09:59,2023-03-04 07:36:24,11.43,-73.876129,40.771793,1,-73.972145,40.847412,1,90.0,8.0,18.04
3,2023-03-04 07:26:30,2023-03-04 07:45:42,10.78,-73.849518,40.748043,1,-73.700356,40.75808,1,84.95,17.05,0.0
4,2023-03-04 07:26:39,2023-03-04 07:27:05,0.04,-73.777267,40.644588,1,-73.776688,40.644798,1,58.0,11.76,0.0
5,2023-03-04 07:31:00,2023-03-04 09:09:45,0.0,-73.776833,40.646111,1,-73.861954,40.768581,1,45.0,9.1,0.0
6,2023-03-04 07:37:06,2023-03-04 07:37:10,0.0,-73.933357,40.798641,1,-73.933365,40.798645,1,16.8,3.0,0.0
7,2023-03-04 07:39:41,2023-03-04 07:39:43,0.0,0.0,0.0,1,-73.973953,40.791351,1,59.5,12.06,0.0
8,2023-03-04 07:40:55,2023-03-04 08:39:06,44.49,-74.017181,40.708397,1,-74.640816,40.574982,1,225.0,47.56,12.5
9,2023-03-04 07:41:47,2023-03-04 07:58:55,5.03,-73.993637,40.746593,1,-74.034325,40.716373,1,52.0,12.96,12.5


# HTML IMAGE VISUALIZATION

## Payment Distribution (payment_distribution.html)

In [4]:
import plotly.express as px

# Create a histogram for payment distribution
payment_distribution = px.histogram(
    df,
    x='payment_type',
    title='Payment Distribution',
    labels={'payment_type': 'Payment Type', 'count': 'Frequency'},
    color_discrete_sequence=['skyblue'],  # Set color for bars
)

# Save the plot as an HTML file
payment_distribution.write_html('payment_distribution.html')

## Trip Distance Vs Fair (trip_distance_vs_fare.html)

In [5]:
import plotly.express as px

# Create a scatter plot for trip distance vs. trip fare
scatter_plot = px.scatter(
    df,
    x='trip_distance',
    y='trip_fare',
    title='Trip Distance vs. Trip Fare',
    labels={'trip_distance': 'Trip Distance', 'trip_fare': 'Trip Fare'},
    hover_name='trip_fare',
    color_discrete_sequence=['blue'],  # Set color for points
)

# Identify and highlight specific points (e.g., short trips with high fare and long trips with low fare)
outliers = df[
    ((df['trip_distance'] < 5) & (df['trip_fare'] > 50)) | 
    ((df['trip_distance'] > 20) & (df['trip_fare'] < 20))
]

scatter_plot.add_trace(
    px.scatter(
        outliers,
        x='trip_distance',
        y='trip_fare',
        color_discrete_sequence=['red'],  # Set color for outliers
        hover_name='trip_fare'
    ).data[0]
)

# Save the plot as an HTML file
scatter_plot.write_html('trip_distance_vs_fare.html')

## Tip Distribution Vs Distance (tip_distribution_vs_distance.html)

In [6]:
import plotly.express as px

# Create a scatter plot for trip distance vs. tip amount
tip_distribution = px.scatter(
    df,
    x='trip_distance',
    y='tip_amount',
    title='Trip Distance vs. Tip Amount',
    labels={'trip_distance': 'Trip Distance', 'tip_amount': 'Tip Amount'},
    hover_name='tip_amount',
    color_discrete_sequence=['blue'],  # Set color for points
)

# Identify and highlight specific points (e.g., short trips with high tip and long trips with low tip)
outliers = df[
    ((df['trip_distance'] < 5) & (df['tip_amount'] > 5)) | 
    ((df['trip_distance'] > 20) & (df['tip_amount'] < 2))
]

tip_distribution.add_trace(
    px.scatter(
        outliers,
        x='trip_distance',
        y='tip_amount',
        color_discrete_sequence=['red'],  # Set color for outliers
        hover_name='tip_amount'
    ).data[0]
)

# Save the plot as an HTML file
tip_distribution.write_html('tip_distribution_vs_distance.html')

## Pickup_hour Distribution (pickup_hour_distribution.html)

In [7]:
import pandas as pd
import plotly.express as px

# Assuming 'pickup_time' is a string containing the pickup time
df['pickup_time'] = pd.to_datetime(df['pickup_time'])
df['pickup_hour'] = df['pickup_time'].dt.hour

# Create a histogram for pickup hour distribution
pickup_hour_distribution = px.histogram(
    df,
    x='pickup_hour',
    title='Pickup Hour Distribution',
    labels={'pickup_hour': 'Pickup Hour', 'count': 'Frequency'},
    color='pickup_hour',
    color_discrete_sequence=px.colors.sequential.Viridis,  # Set color sequence for different hours
)

# Save the plot as an HTML file
pickup_hour_distribution.write_html('pickup_hour_distribution.html')

## Rate Card Distribution (rate_card_distribution.html)

In [8]:
import plotly.express as px

# Assuming you have columns 'pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'trip_fare'
# Create a new column 'route' based on pickup and dropoff coordinates
df['route'] = df.apply(lambda row: ((row['pickup_longitude'], row['pickup_latitude']),
                                   (row['dropoff_longitude'], row['dropoff_latitude'])), axis=1)

# Create a DataFrame for rate card distribution
rate_card_df = df.groupby('route')['trip_fare'].mean().reset_index()

# Create a scatter plot for rate card distribution
rate_card_distribution = px.scatter(
    rate_card_df,
    x='route',
    y='trip_fare',
    title='Rate Card Distribution',
    labels={'trip_fare': 'Average Fare', 'route': 'Route'},
    hover_data=['route'],
)

# Save the plot as an HTML file
rate_card_distribution.write_html('rate_card_distribution.html')

## Time Series Distribution (time_series_distribution.html)

In [9]:
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose

# Decompose the time series into trend, seasonality, and residual components
decomposition_result = seasonal_decompose(df['trip_fare'], model='additive', period=30)

# Create a DataFrame for plotting
plot_data = pd.DataFrame({
    'Date': df.index,
    'Trend': decomposition_result.trend,
    'Seasonality': decomposition_result.seasonal,
    'Residuals': decomposition_result.resid,
    'Original': df['trip_fare']
})

# Create an interactive time series plot
fig = go.Figure()

# Add lines for each component
fig.add_trace(go.Scatter(x=plot_data['Date'], y=plot_data['Trend'], mode='lines', name='Trend'))
fig.add_trace(go.Scatter(x=plot_data['Date'], y=plot_data['Seasonality'], mode='lines', name='Seasonality'))
fig.add_trace(go.Scatter(x=plot_data['Date'], y=plot_data['Residuals'], mode='lines', name='Residuals'))
fig.add_trace(go.Scatter(x=plot_data['Date'], y=plot_data['Original'], mode='lines', name='Original'))

# Add pointers showing information for each data point
hover_template = '<b>Date:</b> %{x}<br>' \
                 '<b>Value:</b> %{y}<br>' \
                 '<b>Trend:</b> %{customdata[0]:.2f}<br>' \
                 '<b>Seasonality:</b> %{customdata[1]:.2f}<br>' \
                 '<b>Residuals:</b> %{customdata[2]:.2f}<br>' \
                 '<b>Original:</b> %{customdata[3]:.2f}'

fig.update_traces(customdata=plot_data[['Trend', 'Seasonality', 'Residuals', 'Original']],
                  hovertemplate=hover_template)

# Update layout
fig.update_layout(title='Time Series Decomposition with Pointers',
                  xaxis_title='Date',
                  yaxis_title='Value')

# Save the plot as an HTML file
fig.write_html('time_series_distribution.html')

## Day Wise Distribution (day_wise_distribution.html)

In [10]:
import plotly.express as px

# Create a DataFrame for plotting
plot_data = pd.DataFrame({
    'Date': df.index,
    'Trend': decomposition_result.trend,
    'Seasonality': decomposition_result.seasonal,
    'Residuals': decomposition_result.resid,
    'Original': df['trip_fare']
})

# Create an interactive scatter plot
fig = px.scatter(plot_data, x='Date', y='Original', title='Time Series Decomposition with Cursor Details',
                 labels={'Original': 'Trip Fare'}, hover_data=['Trend', 'Seasonality', 'Residuals', 'Original'])

# Update layout
fig.update_layout(xaxis_title='Date', yaxis_title='Value', xaxis=dict(tickformat='%Y-%m-%d'))

# Save the plot as an HTML file
fig.write_html('day_wise_distribution.html')