## Next Entertainment Survey

Author: Raghad Alotaibi

# 1. Improt libraries 

In [1]:
import numpy as np
import pandas as pd

import plotly as py
import plotly.graph_objs as go


import matplotlib.pyplot as plt
import plotly.express as px


# 2. Read the data

In [2]:
url = 'https://drive.google.com/file/d/1q-jmmJjwssaug5yq3jcojUEz9BI1u__h/view?usp=sharing'
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
df = pd.read_csv(path)

In [3]:
df.head()

Unnamed: 0,ID,q1,q2,q3,q4,q5,q6,q7,q8,q9
0,1,2,4,1,4,1,3,1,1,Null
1,2,2,4,1,4,2,2,3,4,4
2,3,1,3,1,2,3,5,1,3,1
3,4,2,4,3,4,1,4,3,2,3
4,5,2,2,2,4,4,3,1,3,3


In [4]:
df.shape

(500, 10)

In [5]:
df.dtypes

ID     int64
q1     int64
q2     int64
q3     int64
q4     int64
q5     int64
q6     int64
q7     int64
q8     int64
q9    object
dtype: object

# 3. Exploratory Data Analysis  (EDA) & Data Preprocessing

In [6]:
#droping ID
df = df.drop("ID", axis = 1)

In [7]:
#rename columns
df = df.rename(columns={"q1":'Gender',
                        "q2":'AgeGroup',
                        "q3":'MaritalStatus',
                        "q4":'MonthlyIncomeLevel',
                        "q5":'ResidentialArea',
                        "q6":'EventsSatisfaction',
                        "q7":'WeeklyVisitFrequency',
                        "q8":'Paywilingness',
                        "q9":'MoneySpend'},inplace=False)
                        

In [8]:
df.head()

Unnamed: 0,Gender,AgeGroup,MaritalStatus,MonthlyIncomeLevel,ResidentialArea,EventsSatisfaction,WeeklyVisitFrequency,Paywilingness,MoneySpend
0,2,4,1,4,1,3,1,1,Null
1,2,4,1,4,2,2,3,4,4
2,1,3,1,2,3,5,1,3,1
3,2,4,3,4,1,4,3,2,3
4,2,2,2,4,4,3,1,3,3


In [9]:
df = df.replace('Null',np.nan, regex=True)

In [10]:
df.head()

Unnamed: 0,Gender,AgeGroup,MaritalStatus,MonthlyIncomeLevel,ResidentialArea,EventsSatisfaction,WeeklyVisitFrequency,Paywilingness,MoneySpend
0,2,4,1,4,1,3,1,1,
1,2,4,1,4,2,2,3,4,4.0
2,1,3,1,2,3,5,1,3,1.0
3,2,4,3,4,1,4,3,2,3.0
4,2,2,2,4,4,3,1,3,3.0


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Gender                500 non-null    int64 
 1   AgeGroup              500 non-null    int64 
 2   MaritalStatus         500 non-null    int64 
 3   MonthlyIncomeLevel    500 non-null    int64 
 4   ResidentialArea       500 non-null    int64 
 5   EventsSatisfaction    500 non-null    int64 
 6   WeeklyVisitFrequency  500 non-null    int64 
 7   Paywilingness         500 non-null    int64 
 8   MoneySpend            277 non-null    object
dtypes: int64(8), object(1)
memory usage: 35.3+ KB


In [12]:
df.isnull().sum()

Gender                    0
AgeGroup                  0
MaritalStatus             0
MonthlyIncomeLevel        0
ResidentialArea           0
EventsSatisfaction        0
WeeklyVisitFrequency      0
Paywilingness             0
MoneySpend              223
dtype: int64

In [13]:
df_viz = df.copy()

In [14]:
df_viz

Unnamed: 0,Gender,AgeGroup,MaritalStatus,MonthlyIncomeLevel,ResidentialArea,EventsSatisfaction,WeeklyVisitFrequency,Paywilingness,MoneySpend
0,2,4,1,4,1,3,1,1,
1,2,4,1,4,2,2,3,4,4
2,1,3,1,2,3,5,1,3,1
3,2,4,3,4,1,4,3,2,3
4,2,2,2,4,4,3,1,3,3
...,...,...,...,...,...,...,...,...,...
495,2,3,2,3,4,4,2,1,
496,1,1,3,1,4,5,1,1,
497,1,1,2,4,2,2,2,4,3
498,1,4,3,3,2,1,3,4,4


## Relabeling the columns values for visualization interpretation 

In [15]:

df_viz['Gender'] = df_viz['Gender'].replace([1,2],['Male','Female'])

In [16]:
df_viz['AgeGroup'] = df_viz['AgeGroup'].replace([1,2,3,4,5],['18-24','25-30','31-45','46-55','56+'])

In [17]:
df_viz['MaritalStatus'] = df_viz['MaritalStatus'].replace([1,2,3,4],['Single','Married with no kids',
                                                                 'Married with kids','Single with kids'])

In [18]:
df_viz['MonthlyIncomeLevel'] = df_viz['MonthlyIncomeLevel'].replace([1,2,3,4],['<7,000 SAR','7,000-12,000SAR'
                                                                          ,'13,000-20,000SAR','>20,000 SAR'])

In [19]:
df_viz['ResidentialArea'] = df_viz['ResidentialArea'].replace([1,2,3,4],['North-West of Riyadh',
                                                                     'North-Eest of Riyadh',
                                                                     'South-West of Riyadh',
                                                                     'South-Eest of Riyadh'])

In [20]:
df_viz['WeeklyVisitFrequency'] = df_viz['WeeklyVisitFrequency'].replace([1,2,3],['I only go out on weekends',
                                                                             '2-3 times a week',
                                                                             '3+ times a week'])

In [21]:
df_viz['Paywilingness'] = df_viz['Paywilingness'].replace([1,2,3],['I only attend free events',
                                                               'I will attend if the ticket is priced reasonably',
                                                               'I will attend if the event is exciting enough'])

In [22]:
df_viz['MoneySpend'] = df_viz['MoneySpend'].replace(['1','2','3','4'],['<50 SAR',
                                                               '50-100 SAR',
                                                               '101-200 SAR',
                                                               '200+ SAR'])

In [23]:
df_viz

Unnamed: 0,Gender,AgeGroup,MaritalStatus,MonthlyIncomeLevel,ResidentialArea,EventsSatisfaction,WeeklyVisitFrequency,Paywilingness,MoneySpend
0,Female,46-55,Single,">20,000 SAR",North-West of Riyadh,3,I only go out on weekends,I only attend free events,
1,Female,46-55,Single,">20,000 SAR",North-Eest of Riyadh,2,3+ times a week,4,200+ SAR
2,Male,31-45,Single,"7,000-12,000SAR",South-West of Riyadh,5,I only go out on weekends,I will attend if the event is exciting enough,<50 SAR
3,Female,46-55,Married with kids,">20,000 SAR",North-West of Riyadh,4,3+ times a week,I will attend if the ticket is priced reasonably,101-200 SAR
4,Female,25-30,Married with no kids,">20,000 SAR",South-Eest of Riyadh,3,I only go out on weekends,I will attend if the event is exciting enough,101-200 SAR
...,...,...,...,...,...,...,...,...,...
495,Female,31-45,Married with no kids,"13,000-20,000SAR",South-Eest of Riyadh,4,2-3 times a week,I only attend free events,
496,Male,18-24,Married with kids,"<7,000 SAR",South-Eest of Riyadh,5,I only go out on weekends,I only attend free events,
497,Male,18-24,Married with no kids,">20,000 SAR",North-Eest of Riyadh,2,2-3 times a week,4,101-200 SAR
498,Male,46-55,Married with kids,"13,000-20,000SAR",North-Eest of Riyadh,1,3+ times a week,4,200+ SAR


In [24]:

df_viz.Gender.value_counts(normalize=True)

Female    0.554
Male      0.446
Name: Gender, dtype: float64

# Takeaway 1

### Who are the residents of Riyadh City from a demographic’s perspective 

In [25]:
#pip install plotly==5.5.0

Note: you may need to restart the kernel to use updated packages.


In [54]:
fig = px.histogram(df_viz, x='Gender', text_auto=True)
fig.show()

### Gender percentages 

In [27]:
df_viz.Gender.value_counts(normalize=True)*100

Female    55.4
Male      44.6
Name: Gender, dtype: float64

In [28]:
df_viz.Gender.value_counts()

Female    277
Male      223
Name: Gender, dtype: int64

In [29]:
fig = px.histogram(df_viz, x='AgeGroup')
fig.update_layout(xaxis={'categoryorder':'category ascending'})
fig.show()

In [30]:
df_viz.AgeGroup.value_counts(normalize=True)*100

56+      22.0
25-30    20.8
46-55    19.6
31-45    19.0
18-24    18.6
Name: AgeGroup, dtype: float64

In [31]:
df_viz.AgeGroup.value_counts()

56+      110
25-30    104
46-55     98
31-45     95
18-24     93
Name: AgeGroup, dtype: int64

In [32]:
fig = px.histogram(df_viz, x="MaritalStatus",
             color="ResidentialArea", barmode = 'group')
fig.show()

In [33]:
fig = px.histogram(df_viz, x="MaritalStatus")
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [34]:
df_viz.MaritalStatus.value_counts(normalize=True)*100

Married with kids       28.2
Married with no kids    26.4
Single with kids        25.2
Single                  20.2
Name: MaritalStatus, dtype: float64

In [35]:
df_viz.MaritalStatus.value_counts()

Married with kids       141
Married with no kids    132
Single with kids        126
Single                  101
Name: MaritalStatus, dtype: int64

# Takeaway 2

### Where to host their next event 	

In [36]:
fig = px.histogram(df_viz, x='ResidentialArea')
fig.show()

In [37]:
df_viz.ResidentialArea.value_counts(normalize=True)*100

South-West of Riyadh    25.2
South-Eest of Riyadh    25.2
North-Eest of Riyadh    25.0
North-West of Riyadh    24.6
Name: ResidentialArea, dtype: float64

In [38]:
df_viz.ResidentialArea.value_counts()

South-West of Riyadh    126
South-Eest of Riyadh    126
North-Eest of Riyadh    125
North-West of Riyadh    123
Name: ResidentialArea, dtype: int64

In [39]:
fig = px.histogram(df_viz, x="ResidentialArea",
             color="WeeklyVisitFrequency", barmode = 'group')
fig.show() 

In [40]:
fig = px.histogram(df_viz, x="EventsSatisfaction",
             color="ResidentialArea", barmode = 'group')
fig.show() 

In [41]:
# creating subset of responders living in the north area of Riyadh city
df_viz_north = df_viz.loc[df_viz['ResidentialArea'].isin(['North-West of Riyadh','North-Eest of Riyadh'])]

In [42]:
fig = px.histogram(df_viz_north, x="MaritalStatus", color="ResidentialArea", barmode = 'group')
fig.show()

In [43]:
fig = px.histogram(df_viz_north, x="AgeGroup", color="ResidentialArea", barmode = 'group')
fig.update_layout(xaxis={'categoryorder':'category ascending'})
fig.show()

In [44]:
fig = px.histogram(df_viz_north, x="MonthlyIncomeLevel"
                               , color="ResidentialArea", barmode = 'group')
fig.update_layout(xaxis={'categoryorder':'category descending'})
fig.show()

In [45]:
df_viz_north.shape

(248, 9)

# Takeaway 3

### How much should they price entrance ticket

In [46]:
fig = px.histogram(df_viz, x='MonthlyIncomeLevel')
fig.show()

In [47]:
fig = px.histogram(df_viz_north, x='MonthlyIncomeLevel')
fig.show()

In [48]:
fig = px.treemap(df_viz, path=[px.Constant("Riyadh City"), 'ResidentialArea', 'MonthlyIncomeLevel'],
                  color='MonthlyIncomeLevel'
                )

fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))


fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [49]:
df_viz_north_paying = df_viz_north.dropna()
df_viz_north_paying = df_viz_north_paying[df_viz_north_paying.Paywilingness != 4]

In [50]:
fig = px.histogram(df_viz_north_paying, x='MoneySpend',
             color="Paywilingness", barmode = 'group')
fig.update_xaxes(categoryorder='array', categoryarray= ['<50 SAR','50-100 SAR','101-200 SAR','200+ SAR'])
fig.show() 

In [51]:
df_viz_north_paying

Unnamed: 0,Gender,AgeGroup,MaritalStatus,MonthlyIncomeLevel,ResidentialArea,EventsSatisfaction,WeeklyVisitFrequency,Paywilingness,MoneySpend
3,Female,46-55,Married with kids,">20,000 SAR",North-West of Riyadh,4,3+ times a week,I will attend if the ticket is priced reasonably,101-200 SAR
5,Female,56+,Married with kids,">20,000 SAR",North-West of Riyadh,5,3+ times a week,I will attend if the ticket is priced reasonably,50-100 SAR
6,Male,46-55,Married with no kids,"7,000-12,000SAR",North-Eest of Riyadh,4,I only go out on weekends,I will attend if the ticket is priced reasonably,50-100 SAR
12,Female,25-30,Married with no kids,"7,000-12,000SAR",North-Eest of Riyadh,5,I only go out on weekends,I will attend if the event is exciting enough,101-200 SAR
15,Female,31-45,Single,"7,000-12,000SAR",North-West of Riyadh,5,2-3 times a week,I will attend if the ticket is priced reasonably,<50 SAR
...,...,...,...,...,...,...,...,...,...
445,Female,56+,Married with no kids,"13,000-20,000SAR",North-West of Riyadh,5,I only go out on weekends,I will attend if the ticket is priced reasonably,200+ SAR
454,Female,46-55,Single,"7,000-12,000SAR",North-West of Riyadh,1,I only go out on weekends,I will attend if the ticket is priced reasonably,200+ SAR
455,Male,18-24,Single with kids,">20,000 SAR",North-Eest of Riyadh,1,I only go out on weekends,I will attend if the event is exciting enough,200+ SAR
486,Male,18-24,Married with kids,">20,000 SAR",North-Eest of Riyadh,5,3+ times a week,I will attend if the ticket is priced reasonably,50-100 SAR


In [52]:
fig = px.histogram(df_viz_north_paying, x="MoneySpend",
             color="MonthlyIncomeLevel", barmode = 'group')
fig.show() 

In [53]:
fig = px.histogram(df_viz_north_paying, x="Paywilingness",
                           color="WeeklyVisitFrequency", barmode = 'group')
fig.show() 

# Your next event should be located in the north area of Riyadh City, include activities for married females with kids, aged between 25 and 30 and with the entrance ticket priced between 101-200 SAR to cover the largest interest customer group.

