## IMPORTING DEPENDENCIES

In [322]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

## LOADING THE DATASET

In [323]:
dataset=pd.read_csv("googleplaystore.csv")
dataset.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


## GETTING INFO ABOUT DATASET

In [324]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             10841 non-null  object 
 1   Category        10841 non-null  object 
 2   Rating          9369 non-null   float64
 3   Reviews         10841 non-null  object 
 4   Size            10841 non-null  object 
 5   Installs        10841 non-null  object 
 6   Type            10840 non-null  object 
 7   Price           10841 non-null  object 
 8   Content Rating  10841 non-null  object 
 9   Genres          10841 non-null  object 
 10  Last Updated    10841 non-null  object 
 11  Current Ver     10833 non-null  object 
 12  Android Ver     10838 non-null  object 
dtypes: float64(1), object(12)
memory usage: 1.1+ MB


## DROPPING UNUSED COLUMNS

In [325]:
dataset.drop(columns=['Genres','Last Updated','Android Ver','Current Ver'],inplace=True)

## CHECKING FOR NAN VALUES

In [326]:
dataset.isnull().sum()
dataset=dataset.dropna()
print(dataset.isnull().sum(),len(dataset))

App               0
Category          0
Rating            0
Reviews           0
Size              0
Installs          0
Type              0
Price             0
Content Rating    0
dtype: int64 9369


## DATA PREPROCESSING

In [327]:
def CommaRemove(x):
    x=x.replace(",","")
    return x

dataset['Installs']=dataset['Installs'].apply(lambda x:x[:-1])
dataset['Installs']=dataset['Installs'].apply(lambda x:CommaRemove(x))

dataset


Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,10000,Free,0,Everyone
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0,Everyone
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,5000000,Free,0,Everyone
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,50000000,Free,0,Teen
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,100000,Free,0,Everyone
...,...,...,...,...,...,...,...,...,...
10834,FR Calculator,FAMILY,4.0,7,2.6M,500,Free,0,Everyone
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,5000,Free,0,Everyone
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100,Free,0,Everyone
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,1000,Free,0,Mature 17+


In [328]:
dataset['Installs']=pd.to_numeric(dataset['Installs'],errors='coerce')
dataset['Reviews']=pd.to_numeric(dataset['Reviews'],errors='coerce')

In [357]:
def Helper(x):
    if x[-1]=='k':
        x=float(x[:-1])
        x=x/1000
    elif x[-1]=='M':
        x=float(x[:-1])
    return x
dataset['Size']=pd.to_numeric(dataset['Size'].apply(lambda x:Helper(x)),errors='coerce')

## DATA ANAYSIS

#### List out the data with rating greater than 4.5 and category belongs to communication and also installs 1M

In [361]:
dataset[dataset.eval("Rating>4.5 & Category=='COMMUNICATION' & Installs>=1000000")]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating
371,Google Duo - High Quality Video Calls,COMMUNICATION,4.6,2083237.0,,500000000.0,Free,0,Everyone
413,"CM Browser - Ad Blocker , Fast Download , Privacy",COMMUNICATION,4.6,2264916.0,6.1,50000000.0,Free,0,Everyone
438,Should I Answer?,COMMUNICATION,4.8,237468.0,8.8,1000000.0,Free,0,Everyone
455,Email TypeApp - Mail App,COMMUNICATION,4.6,183374.0,44.0,1000000.0,Free,0,Everyone
459,Mail.Ru - Email App,COMMUNICATION,4.6,837842.0,,50000000.0,Free,0,Everyone
471,JusTalk - Free Video Calls and Fun Video Chat,COMMUNICATION,4.6,191032.0,26.0,5000000.0,Free,0,Everyone
4039,Google Duo - High Quality Video Calls,COMMUNICATION,4.6,2083237.0,,500000000.0,Free,0,Everyone
4739,WhatsCall Free Global Phone Call App & Cheap C...,COMMUNICATION,4.6,1130966.0,27.0,10000000.0,Free,0,Everyone
4949,"CM Browser - Ad Blocker , Fast Download , Privacy",COMMUNICATION,4.6,2265084.0,6.1,50000000.0,Free,0,Everyone
7543,CM Transfer - Share any files with friends nearby,COMMUNICATION,4.6,71740.0,5.8,5000000.0,Free,0,Everyone


#### Listing the app with maximum size

In [359]:
dataset[dataset['Size']==max(dataset['Size'])]

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating
1080,Post Bank,FINANCE,4.5,60449.0,100.0,1000000.0,Free,0,Everyone
1565,Talking Babsy Baby: Baby Games,LIFESTYLE,4.0,140995.0,100.0,10000000.0,Free,0,Everyone
1758,Hungry Shark Evolution,GAME,4.5,6074334.0,100.0,100000000.0,Free,0,Teen
1793,Mini Golf King - Multiplayer Game,GAME,4.5,531458.0,100.0,5000000.0,Free,0,Everyone
1988,Hungry Shark Evolution,GAME,4.5,6074627.0,100.0,100000000.0,Free,0,Teen
2299,Navi Radiography Pro,MEDICAL,4.7,11.0,100.0,500.0,Paid,$15.99,Everyone
3973,Hungry Shark Evolution,GAME,4.5,6071542.0,100.0,100000000.0,Free,0,Teen
4690,Vi Trainer,HEALTH_AND_FITNESS,3.6,124.0,100.0,5000.0,Free,0,Everyone
5427,Ultimate Tennis,SPORTS,4.3,183004.0,100.0,10000000.0,Free,0,Everyone
5530,The Walking Dead: Our World,GAME,4.0,22435.0,100.0,1000000.0,Free,0,Teen


#### Most Reviewed App

In [318]:
dataset[dataset['Reviews']==max(dataset['Reviews'])]['App']

2544    Facebook
Name: App, dtype: object

#### Highest Rating Apps

In [243]:
dataset[dataset['Rating']==min(dataset['Rating'])]['App']

625                               House party - live chat
4127                                    Speech Therapy: F
5151                                        Clarksburg AH
5978                        Truck Driving Test Class 3 BC
6319                     BJ Bridge Standard American 2018
6490                                               MbH BM
7144                                        CB Mobile Biz
7383                                       Thistletown CI
7427                                       CJ DVD Rentals
7806                                          CR Magazine
7926                                 Tech CU Card Manager
8820                                       DS Creator 2.0
8875                                       DT future1 cam
10324                      FE Mechanical Engineering Prep
10400             Familial Hypercholesterolaemia Handbook
10591    Lottery Ticket Checker - Florida Results & Lotto
Name: App, dtype: object

## DATA VISUALIZATION

In [244]:
# DATA VISUALIZATIOn
fig=px.pie(dataset,names="Type")
fig.show()

In [245]:
# fig=px.pie(dataset,names="Content Rating",title="TYPE OF CONTENT RATING",color_discrete_sequence=px.colors.sequential.OrRd_r)
# fig=go.Figure(data=)
fig=go.Figure(data=[go.Pie(labels=dataset['Content Rating'],title="TYPE OF CONTENT RATING",textinfo='label+percent',pull=[0.1])])
fig.show()

In [246]:
popular_app=dataset[dataset['Installs']==100000000]

In [247]:
fig=px.bar(popular_app,x='Category',color='Category',hover_name='App')
fig.show()

In [248]:
random_dataset=dataset.sample(n=50)


In [249]:
fig=px.scatter(random_dataset,x='Installs',y='Rating',size_max=100,hover_name='App',size="Reviews",color='Category')
fig.show()

In [250]:
counts=dataset['Category'].value_counts()

In [251]:
fig=px.bar(counts,x='Category',color='Category')
fig.show()
