# Google Playstore

## Load in Dataset

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import seaborn as sns

df = pd.read_csv('data/Google-Playstore.csv')

print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1118136 entries, 0 to 1118135
Data columns (total 23 columns):
 #   Column             Non-Null Count    Dtype  
---  ------             --------------    -----  
 0   App Name           1118135 non-null  object 
 1   App Id             1118136 non-null  object 
 2   Category           1118133 non-null  object 
 3   Rating             1111286 non-null  float64
 4   Rating Count       1111286 non-null  float64
 5   Installs           1117975 non-null  object 
 6   Minimum Installs   1117975 non-null  float64
 7   Maximum Installs   1118136 non-null  int64  
 8   Free               1118136 non-null  bool   
 9   Price              1118136 non-null  float64
 10  Currency           1117975 non-null  object 
 11  Size               1118136 non-null  object 
 12  Minimum Android    1116123 non-null  object 
 13  Developer Id       1118134 non-null  object 
 14  Developer Website  703770 non-null   object 
 15  Developer Email    1118114 non-n

Unnamed: 0,App Name,App Id,Category,Rating,Rating Count,Installs,Minimum Installs,Maximum Installs,Free,Price,...,Developer Id,Developer Website,Developer Email,Released,Last Updated,Content Rating,Privacy Policy,Ad Supported,In App Purchases,Editors Choice
0,HTTrack Website Copier,com.httrack.android,Communication,3.6,2848.0,"100,000+",100000.0,351560,True,0.0,...,Xavier Roche,http://www.httrack.com/,roche+android@httrack.com,"Aug 12, 2013","May 20, 2017",Everyone,http://android.httrack.com/privacy-policy.html,False,False,False
1,World War 2: Offline Strategy,com.skizze.wwii,Strategy,4.3,17297.0,"1,000,000+",1000000.0,2161778,True,0.0,...,Skizze Games,http://stereo7.com/,Skizze.Games@gmail.com,"Jul 19, 2018","Nov 26, 2020",Everyone 10+,https://www.iubenda.com/privacy-policy/8032781,True,True,False
2,WPSApp,com.themausoft.wpsapp,Tools,4.2,488639.0,"50,000,000+",50000000.0,79304739,True,0.0,...,TheMauSoft,http://www.themausoft.com,wpsapp.app@gmail.com,"Mar 7, 2016","Oct 21, 2020",Everyone,https://sites.google.com/view/wpsapppolicy/main,True,False,False
3,"OfficeSuite - Office, PDF, Word, Excel, PowerP...",com.mobisystems.office,Business,4.2,1224420.0,"100,000,000+",100000000.0,163660067,True,0.0,...,MobiSystems,http://www.mobisystems.com,support-officesuite-android@mobisystems.com,"Dec 22, 2011","Nov 23, 2020",Everyone,http://www.mobisystems.com/mobile/privacy-poli...,True,True,False
4,Loud Player Free,com.arthelion.loudplayer,Music & Audio,4.2,665.0,"50,000+",50000.0,73463,True,0.0,...,Arthelion92,http://www.arthelion.com,arthelion92@gmail.com,"Sep 24, 2016","Nov 22, 2020",Everyone,http://www.arthelion.com/index.php/fr/android-...,False,False,False


## Remove irrelevant columns

In [2]:
df = df.drop(['App Id', 'Currency', 'Developer Website', 'Developer Email', 'Privacy Policy', ], axis=1)

## Remove paid apps

In [3]:
df = df[df['Free'] != False]

## Order by category

In [4]:
print(df['Category'].value_counts())

Education                  108332
Music & Audio              102787
Entertainment               80644
Books & Reference           75565
Personalization             66213
Tools                       64811
Lifestyle                   53248
Business                    41534
Health & Fitness            30271
Productivity                28638
Photography                 28299
Travel & Local              24546
Finance                     24287
Food & Drink                24003
Puzzle                      23695
News & Magazines            21421
Sports                      20869
Shopping                    20342
Casual                      19877
Communication               17766
Arcade                      16964
Social                      16715
Simulation                  14791
Art & Design                12156
Action                      12089
Medical                     11131
Educational                 10248
Maps & Navigation            9757
Adventure                    9300
Video Players 

In [5]:
df.sort_values(by=['Maximum Installs'], ascending=False)

Unnamed: 0,App Name,Category,Rating,Rating Count,Installs,Minimum Installs,Maximum Installs,Free,Price,Size,Minimum Android,Developer Id,Released,Last Updated,Content Rating,Ad Supported,In App Purchases,Editors Choice
11491,Google Play services,Tools,4.2,32425599.0,"10,000,000,000+",1.000000e+10,10772700105,True,0.0,Varies with device,Varies with device,Google LLC,"May 24, 2012","Nov 19, 2020",Everyone,False,False,False
220811,YouTube,Video Players & Editors,4.1,88579975.0,"5,000,000,000+",5.000000e+09,8571451598,True,0.0,Varies with device,Varies with device,Google LLC,"Oct 20, 2010","Nov 25, 2020",Teen,True,False,False
220810,Google,Tools,4.2,17237577.0,"5,000,000,000+",5.000000e+09,8008951294,True,0.0,Varies with device,Varies with device,Google LLC,"Aug 12, 2010","Nov 27, 2020",Everyone,True,False,False
220806,Maps - Navigate & Explore,Travel & Local,4.1,12451778.0,"5,000,000,000+",5.000000e+09,7991928686,True,0.0,Varies with device,Varies with device,Google LLC,,"Nov 19, 2020",Everyone,True,False,True
39,Google Text-to-Speech,Tools,4.3,2292232.0,"5,000,000,000+",5.000000e+09,7769956479,True,0.0,Varies with device,Varies with device,Google LLC,"Oct 10, 2013","Oct 15, 2020",Everyone,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277468,Happy Easter Greeting Cards @ E-Cards,Social,0.0,0.0,0+,0.000000e+00,0,True,0.0,16M,4.4 and up,New Wave Apps Dev,"May 3, 2020","May 04, 2020",Everyone,True,False,False
296842,AppLock Theme Scorpio,Personalization,0.0,0.0,0+,0.000000e+00,0,True,0.0,1.6M,4.0 and up,AppLock@DoMobile,"Nov 27, 2020","Nov 27, 2020",Everyone,False,False,False
818832,iTee - Shirts for Men and Girls (Merry Christ...,Shopping,0.0,0.0,0+,0.000000e+00,0,True,0.0,5.0M,4.4 and up,Lighthouse Lamp Apps,"Nov 13, 2020","Nov 26, 2020",Everyone,False,False,False
832583,NICHEWallet,Education,0.0,0.0,0+,0.000000e+00,0,True,0.0,22M,4.0 and up,ImadeApplications,"Oct 26, 2018","Oct 26, 2018",Everyone,False,False,False
