In [81]:
import pandas as pd
import numpy as np
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import random

In [82]:
users = pd.read_csv('profile_data.csv', index_col=[0])
users.head()

Unnamed: 0,City,Category,Age,Gender
0,Pondicherry,Natural,60,Male
1,Alappuzha,Architectural,18,Male
2,Amritsar,Religious,62,Female
3,Manali,Historical,23,Male
4,Panjim,Architectural,22,Female


In [83]:
users.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 65 entries, 0 to 64
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   City      65 non-null     object
 1   Category  65 non-null     object
 2   Age       65 non-null     int64 
 3   Gender    65 non-null     object
dtypes: int64(1), object(3)
memory usage: 2.5+ KB


In [84]:
users.shape

(65, 4)

### Checking Gender Diversity of Users

In [85]:
users['Gender'].value_counts()

Female    37
Male      28
Name: Gender, dtype: int64

In [86]:
biology = users['Gender'].value_counts()
fig = px.pie(labels=biology.index,
             values=biology.values,
             title='Percentage of Male vs. Female Users',
             names=biology.index,
             hole=0.2)

fig.update_traces(textposition='inside', textfont_size=15, textinfo='percent')
fig.show()


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



## Category based on Gender

In [87]:
women = users[users['Gender']=='Female']
cat_women = women.groupby(['Category'], 
                               as_index=False).agg({'Gender': pd.Series.count})
cat_women.head()

Unnamed: 0,Category,Gender
0,Architectural,7
1,Historical,12
2,Natural,6
3,Religious,12


In [88]:
fig = px.pie(labels=cat_women['Category'],
             values=cat_women.Gender,
             title='Selection of Category for Females',
             names=cat_women['Category'],
             hole=0.2)

fig.update_traces(textposition='inside', textfont_size=15)
fig.show()

In [89]:
men = users[users['Gender']=='Male']
cat_men = men.groupby(['Category'], 
                               as_index=False).agg({'Gender': pd.Series.count})
cat_men.head()

Unnamed: 0,Category,Gender
0,Architectural,3
1,Historical,10
2,Natural,6
3,Religious,9


In [90]:
fig = px.pie(labels=cat_men['Category'],
             values=cat_men.Gender,
             title='Selection of Category for Males',
             names=cat_men['Category'],
             hole=0.2)

fig.update_traces(textposition='inside', textfont_size=15)
fig.show()

## Most Selected Cities

In [91]:
# cities = users['City']
cities = users['City'].value_counts()
cities.values

array([6, 5, 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1])

In [92]:
cities.index

Index(['Kanyakumari', 'Pondicherry', 'Bhopal', 'Varanasi', 'Alappuzha',
       'Amritsar', 'Agra', 'Panjim', 'Jodhpur', 'Kolkata', 'Srinagar', 'Pune',
       'Jaipur', 'New Delhi', 'Jaisalmer', 'Almora', 'Aurangabad', 'Manali',
       'Leh', 'Munnar', 'Mysuru (Mysore)', 'Kodaikanal', 'Darjeeling',
       'Hyderabad', 'Nainital', 'Udaipur', 'Hampi'],
      dtype='object')

In [93]:
h_bar = px.bar(x=cities.values,
               y=cities.index,
               orientation='h',
               color=cities.values,
               color_continuous_scale='Viridis',
               title='Top Cities by Interested Tourists')
 
h_bar.update_layout(xaxis_title='Number of Interested Tourists', 
                    yaxis_title='City',
                    coloraxis_showscale=False)
h_bar.update_layout(yaxis=dict(autorange="reversed"))
h_bar.show()

## City based on Gender

In [94]:
city_women = users[users['Gender']=='Female']['City']
city_women = city_women.value_counts()

In [95]:
city_women.values

array([3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [96]:
city_women.index

Index(['Pondicherry', 'Kanyakumari', 'Bhopal', 'Jodhpur', 'Agra', 'Kolkata',
       'Aurangabad', 'Jaisalmer', 'Pune', 'Srinagar', 'Jaipur', 'Manali',
       'Kodaikanal', 'Varanasi', 'Amritsar', 'Alappuzha', 'New Delhi',
       'Panjim', 'Leh', 'Almora', 'Hyderabad'],
      dtype='object')

In [97]:
colors = [random.randint(1, 42) for _ in range(len(city_women.values))]

h_bar = px.bar(x=city_women.values,
               y=city_women.index,
               orientation='h',
               color=colors,
               color_continuous_scale='Viridis',
               title='Top Cities by Interested Tourists - Female')
 
h_bar.update_layout(xaxis_title='Number of Interested Tourists', 
                    yaxis_title='City',
                    coloraxis_showscale=False)
h_bar.update_layout(yaxis=dict(autorange="reversed"))
h_bar.show()

In [98]:
city_men = users[users['Gender']=='Male']['City']
city_men = city_men.value_counts()

In [99]:
import random

colors = [random.randint(1, 42) for _ in range(len(city_men.values))]

h_bar = px.bar(x=city_men.values,
               y=city_men.index,
               orientation='h',
               color=colors,
               color_continuous_scale='Viridis',
               title='Top Cities by Interested Tourists - Male')
 
h_bar.update_layout(xaxis_title='Number of Interested Tourists', 
                    yaxis_title='City',
                    coloraxis_showscale=False)
h_bar.update_layout(yaxis=dict(autorange="reversed"))
h_bar.show()