# DATA PREPROCESSING OF GOOGLE PLAYSTORE

In [2]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

## HANDLING AND REMOVING NULL VALUES

In [6]:
df = pd.read_csv("googleplaystore.csv")
df.isnull().sum()

App                  0
Category             0
Rating            1474
Reviews              0
Size                 0
Installs             0
Type                 1
Price                0
Content Rating       1
Genres               0
Last Updated         0
Current Ver          8
Android Ver          3
dtype: int64

In [8]:
imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
imputer.fit(df.iloc[:,2:3].values)
df.iloc[:,2:3] = imputer.transform(df.iloc[:,2:3].values)
df =  df.dropna()
df.isnull().sum()

App               0
Category          0
Rating            0
Reviews           0
Size              0
Installs          0
Type              0
Price             0
Content Rating    0
Genres            0
Last Updated      0
Current Ver       0
Android Ver       0
dtype: int64

## Finding Average Rating

In [9]:
s = 0
for i in df['Rating']:
    s+=i
avg = s/len(df['Rating'])
print(f"Average rating is: {avg:.2f}")


Average rating is: 4.19


## Finding number of ratings between 4 - 4.5

In [10]:
c = 0
for i in df['Rating']:
    if(i>=4 and i<=4.5):
        c+=1
print(f"Number of ratings between 4 and 4.5 is: {c}")


Number of ratings between 4 and 4.5 is: 6915


## Counting the number of paid and free apps

In [11]:
for i in df['Type'].unique():
    print(i)

Free
Paid


In [12]:
f,p=0,0
for i in df['Type']:
    if(i == 'Free'):
        f+=1
    else:
        p+=1
print(f"Number of free apps: {f}\nNumber of paid apps: {p}")

Number of free apps: 10032
Number of paid apps: 797


## Total number of apps in each 'Category'

In [13]:
categories = {}
for name in df['Category'].unique():
    ct = 0
    for i in df['Category']:
        if(i == name):
            ct+=1
    categories[name] = ct
categories

{'ART_AND_DESIGN': 64,
 'AUTO_AND_VEHICLES': 85,
 'BEAUTY': 53,
 'BOOKS_AND_REFERENCE': 230,
 'BUSINESS': 460,
 'COMICS': 60,
 'COMMUNICATION': 387,
 'DATING': 234,
 'EDUCATION': 156,
 'ENTERTAINMENT': 149,
 'EVENTS': 64,
 'FINANCE': 366,
 'FOOD_AND_DRINK': 127,
 'HEALTH_AND_FITNESS': 341,
 'HOUSE_AND_HOME': 88,
 'LIBRARIES_AND_DEMO': 84,
 'LIFESTYLE': 382,
 'GAME': 1144,
 'FAMILY': 1968,
 'MEDICAL': 463,
 'SOCIAL': 295,
 'SHOPPING': 260,
 'PHOTOGRAPHY': 335,
 'SPORTS': 384,
 'TRAVEL_AND_LOCAL': 258,
 'TOOLS': 841,
 'PERSONALIZATION': 390,
 'PRODUCTIVITY': 424,
 'PARENTING': 60,
 'WEATHER': 82,
 'VIDEO_PLAYERS': 175,
 'NEWS_AND_MAGAZINES': 283,
 'MAPS_AND_NAVIGATION': 137}

In [14]:
df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.100000,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.900000,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.700000,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.500000,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.300000,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10836,Sya9a Maroc - FR,FAMILY,4.500000,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.000000,4,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10838,Parkinson Exercices FR,MEDICAL,4.193338,3,9.5M,"1,000+",Free,0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.500000,114,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device
