# Peaking Into Windows Store

## Libraries Used

In [None]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
import plotly.colors

import datetime

from statistics import mean

## Data Preparation 

In [None]:
# Read csv file
df = pd.read_csv('../input/windows-store/msft.csv')
df.head(2)

In [None]:
# Check missing values
df.isnull().sum()

In [None]:
# Remove rows with null values
df.dropna(inplace=True)

In [None]:
# Manipulating data in Price column: replacing free with 0 and changing type to integer
df.Price = df.Price.str.replace('Free', '0')
df.Price = df.Price.str.replace(',', '')
df.Price = df.Price.str.replace('₹', '')
df.Price = df.Price.astype(float)

In [None]:
# Adding a column to dataframe
df['Free/Paid'] = np.where(df['Price'] == 0.0, 'Free', 'Paid')
df.head(2)

## Exploratory Data Analysis

### Categories: Which category has the most app in Windows Store?

In [None]:
# Preparing data for bar Chart
data = pd.DataFrame(df.Category.value_counts())
data.reset_index(inplace=True)
data.rename(columns={'index':'Category', 'Category':'Count'}, inplace = True,)

# Plot bar graph
fig = px.bar(data, x='Count', y='Category', color='Count',
             title='Cateogories of App in Windows Store')
fig.show()

### Free vs Paid App

In [None]:
data = pd.DataFrame(df['Free/Paid'].value_counts())
data.reset_index(inplace=True)
data.rename(columns={'index':'Free/Paid', 'Free/Paid':'Count'}, inplace = True,)

fig = px.pie(data, values='Count', names='Free/Paid', 
             title='Free and Paid App in Windows Store', 
             color_discrete_sequence=px.colors.sequential.Rainbow[3:5])
fig.show()

### Free and Paid App in Each Category

In [None]:
data = df
data = data.groupby(['Category', 'Free/Paid'], as_index=False).size()

fig = px.bar(data, x='Category', y='size', color='Free/Paid', title='Free and Paid App in Each Category')
fig.show()

### Ratings: What is the performance of the app?

In [None]:
print('Average rating:', round(mean(df.Rating),3))

In [None]:
fig = px.histogram(df.Rating, x='Rating', title='Overall Ratings')
fig.show()

### How does the app performe in each category?

In [None]:
fig = px.box(df, x='Category', y='Rating', color='Category', title='Rating for Each Category')
fig.update_traces(quartilemethod='linear')
fig.show()

### Rating vs Price

In [None]:
# Plot scatter plot
fig = px.scatter(df, x='Price', y='Rating', title='Rating and Price Trend')
fig.show()

### Price Trend for Each Category

In [None]:
fig = px.scatter(df, x='Category', y='Price', color='Category', title='Price for Each Category')
fig.show()

### Correlation Heatmap

In [None]:
data = df.corr()
fig = px.imshow(data, title='Heatmap')
fig.show()

### Conclusion

* **Music, Books and Business** are the top categories in Windows Store
* Only **Books, Business and Developer Tools** categories contains **paid** apps.
* Up to **97%** of the apps in Windows Store are **free**.
* The overall rating of apps in the Windows Store is **3.793**.
* There is **not** much relationship between Rating, No of people Rate and Price.