In [1]:
# importing the required libraries
import numpy as np
import pandas as pd
import plotly.express as px
import dash
from dash import Dash, dcc, html
from dash.dependencies import Input, Output
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Load the dataset
df=pd.read_csv('NioPracticeUserbase.csv')

In [3]:
#See the first 5 rows of the dataset
df.head()

Unnamed: 0,User ID,Subscription Type,Monthly Revenue,Join Date,Last Payment Date,Country,Age,Gender,Device,Plan Duration
0,1,Basic,10,15-01-22,10-06-23,United States,28,Male,Smartphone,1 Month
1,2,Premium,15,05-09-21,22-06-23,Canada,35,Female,Tablet,1 Month
2,3,Standard,12,28-02-23,27-06-23,United Kingdom,42,Male,Smart TV,1 Month
3,4,Standard,12,10-07-22,26-06-23,Australia,51,Female,Laptop,1 Month
4,5,Basic,10,01-05-23,28-06-23,Germany,33,Male,Smartphone,1 Month


In [4]:
# see the last 5 rows of the dataset
df.tail()

Unnamed: 0,User ID,Subscription Type,Monthly Revenue,Join Date,Last Payment Date,Country,Age,Gender,Device,Plan Duration
2495,2496,Premium,14,25-07-22,12-07-23,Spain,28,Female,Smart TV,1 Month
2496,2497,Basic,15,04-08-22,14-07-23,Spain,33,Female,Smart TV,1 Month
2497,2498,Standard,12,09-08-22,15-07-23,United States,38,Male,Laptop,1 Month
2498,2499,Standard,13,12-08-22,12-07-23,Canada,48,Female,Tablet,1 Month
2499,2500,Basic,15,13-08-22,12-07-23,United States,35,Female,Smart TV,1 Month


In [5]:
# see how many values for each and every column in this dataset
df.count()

User ID              2500
Subscription Type    2500
Monthly Revenue      2500
Join Date            2500
Last Payment Date    2500
Country              2500
Age                  2500
Gender               2500
Device               2500
Plan Duration        2500
dtype: int64

See in this dataset all the columns contains 2500 values.

In [6]:
# Shape
df.shape

(2500, 10)

In this dataset have 2500 rows and 10 columns.

In [7]:
# see all the columns of the dataset
df.columns

Index(['User ID', 'Subscription Type', 'Monthly Revenue', 'Join Date',
       'Last Payment Date', 'Country', 'Age', 'Gender', 'Device',
       'Plan Duration'],
      dtype='object')

In [8]:
# see the information of the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2500 entries, 0 to 2499
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   User ID            2500 non-null   int64 
 1   Subscription Type  2500 non-null   object
 2   Monthly Revenue    2500 non-null   int64 
 3   Join Date          2500 non-null   object
 4   Last Payment Date  2500 non-null   object
 5   Country            2500 non-null   object
 6   Age                2500 non-null   int64 
 7   Gender             2500 non-null   object
 8   Device             2500 non-null   object
 9   Plan Duration      2500 non-null   object
dtypes: int64(3), object(7)
memory usage: 195.4+ KB


All the columns contains 2500 not-null values and 10 columns, and there are 3 numeric column and 7 categorical columns.

In [9]:
# check null values
df.isnull().sum()

User ID              0
Subscription Type    0
Monthly Revenue      0
Join Date            0
Last Payment Date    0
Country              0
Age                  0
Gender               0
Device               0
Plan Duration        0
dtype: int64

There are no missing values in the dataset.

In [10]:
# check the duplicate values 
df.duplicated().sum()

0

This dataset is completely free from any duplicate values.

In [11]:
# see the datatypes of all columns
df.dtypes

User ID               int64
Subscription Type    object
Monthly Revenue       int64
Join Date            object
Last Payment Date    object
Country              object
Age                   int64
Gender               object
Device               object
Plan Duration        object
dtype: object

In [12]:
df

Unnamed: 0,User ID,Subscription Type,Monthly Revenue,Join Date,Last Payment Date,Country,Age,Gender,Device,Plan Duration
0,1,Basic,10,15-01-22,10-06-23,United States,28,Male,Smartphone,1 Month
1,2,Premium,15,05-09-21,22-06-23,Canada,35,Female,Tablet,1 Month
2,3,Standard,12,28-02-23,27-06-23,United Kingdom,42,Male,Smart TV,1 Month
3,4,Standard,12,10-07-22,26-06-23,Australia,51,Female,Laptop,1 Month
4,5,Basic,10,01-05-23,28-06-23,Germany,33,Male,Smartphone,1 Month
...,...,...,...,...,...,...,...,...,...,...
2495,2496,Premium,14,25-07-22,12-07-23,Spain,28,Female,Smart TV,1 Month
2496,2497,Basic,15,04-08-22,14-07-23,Spain,33,Female,Smart TV,1 Month
2497,2498,Standard,12,09-08-22,15-07-23,United States,38,Male,Laptop,1 Month
2498,2499,Standard,13,12-08-22,12-07-23,Canada,48,Female,Tablet,1 Month


# User Demographics Visualization:
Create a bar chart showing the number of users by gender, with the ability to filter by country through a dropdown.

In [17]:
# Initialize the Dash app
app = Dash(__name__)

# User Demographics Visualization
gender_fig = px.bar(df, x='Gender', color='Gender', title='Number of Users by Gender')

# Dropdown for country filter
country_options = [{'label': country, 'value': country} for country in df['Country'].unique()]

app.layout = html.Div([html.H1('User Demographics Dashboard'),
        dcc.Dropdown(
        id='country-dropdown',
        options=country_options,
        multi=True,
        value=df['Country'].unique(),
        style={'margin-bottom': '20px'}),
        
        dcc.Graph(
        id='gender-bar',                                      # Bar chart for Number of Users by Gender
        figure=gender_fig),])

# Callback to update the bar chart based on country filter
@app.callback(
    Output('gender-bar', 'figure'),
    [Input('country-dropdown', 'value')]
)
def update_gender_fig(selected_countries):
    filtered_df = df[df['Country'].isin(selected_countries)]
    updated_fig = px.bar(filtered_df, x='Gender', color='Gender', title='Number of Users by Gender')
    return updated_fig

if __name__ == '__main__':
    app.run_server(debug=True)


In this User Demographic dashboard gender wise user's for different country where for united states male and female user's 
are almost same and Canada,Australia,France,Mexico,Italy Male user's are more than female user's and 
United Kingdom,Germany,Brazil,Spain Female user's are more than the male user's.

# Subscription Overview:
Display the proportion of each subscription type using a pie chart, incorporating a slider or dropdown to filter data by plan duration.

In [22]:
# Initialize the Dash app
app = dash.Dash(__name__)

# Subscription Overview Pie Chart
subscription_fig = px.pie(df, names='Subscription Type', title='Subscription Type Proportion')

# Add a dropdown for plan duration filter
plan_duration_options = [{'label': duration, 'value': duration} for duration in df['Plan Duration'].unique()]

app.layout = html.Div([
    html.H1('Subscription Overview Dashboard'),

    # Dropdown for plan duration filter
    dcc.Dropdown(
        id='plan-duration-dropdown',
        options=plan_duration_options,
        multi=True,
        value=df['Plan Duration'].unique(),
        style={'margin-bottom': '20px'}
    ),

    # Pie chart for Subscription Type Proportion
    dcc.Graph(
        id='subscription-pie',
        figure=subscription_fig
    ),
])

# Callback to update the pie chart based on plan duration filter
@app.callback(
    Output('subscription-pie', 'figure'),
    [Input('plan-duration-dropdown', 'value')]
)
def update_subscription_fig(selected_durations):
    if not selected_durations:
        # If no plan durations selected, show the overall pie chart
        updated_fig = px.pie(df, names='Subscription Type', title='Subscription Type Proportion')
    else:
        # Filter the dataframe based on selected plan durations
        filtered_df = df[df['Plan Duration'].isin(selected_durations)]
        updated_fig = px.pie(filtered_df, names='Subscription Type', title='Subscription Type Proportion')

    return updated_fig

if __name__ == '__main__':
    app.run_server(debug=True)


Here most of the user's 40% of the user's are takeing basic subscription and standard and premium subscription user's are almost same. Standard user's are 30.7% and Premium user's are 29.3%.

# Monthly Revenue Trend:
Visualize total monthly revenue over time with a line chart, including a checkbox or buttons to toggle the view between overall and segmented by subscription type revenue.

In [21]:
# Initialize the Dash app
app = dash.Dash(__name__)

# Line chart for Total Monthly Revenue over time
revenue_fig = px.line(df, x='Join Date', y='Monthly Revenue', title='Total Monthly Revenue Over Time', 
                      labels={'Monthly Revenue': 'Total Revenue'})

# Add checkboxes for segmented view and overall view
checkbox_options = [
    {'label': 'Overall', 'value': 'Overall'},
    {'label': 'Segmented by Subscription Type', 'value': 'Segmented'}
]

app.layout = html.Div([
    html.H1('Monthly Revenue Dashboard'),

    # Line chart for Total Monthly Revenue
    dcc.Graph(
        id='revenue-line',
        figure=revenue_fig
    ),

    # Checkboxes for view selection
    dcc.Checklist(
        id='view-checkbox',
        options=checkbox_options,
        value=['Overall'],
        labelStyle={'display': 'inline-block'}
    ),
])

# Callback to update the line chart based on checkbox selections
@app.callback(
    Output('revenue-line', 'figure'),
    [Input('view-checkbox', 'value')]
)
def update_revenue_fig(selected_views):
    if 'Overall' in selected_views:
        # Show overall revenue
        updated_fig = px.line(df, x='Join Date', y='Monthly Revenue', title='Total Monthly Revenue Over Time',
                              labels={'Monthly Revenue': 'Total Revenue'})
    else:
        # Show segmented view by subscription type
        updated_fig = px.line(df, x='Join Date', y='Monthly Revenue', color='Subscription Type',
                              title='Monthly Revenue Over Time - Segmented by Subscription Type',
                              labels={'Monthly Revenue': 'Total Revenue'})

    return updated_fig

if __name__ == '__main__':
    app.run_server(debug=True)
