# Microsoft Windows app store

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly

In [None]:
pio.templates.default = "plotly_dark"

In [None]:
PATH = "../input/windows-store/msft.csv"
df = pd.read_csv(PATH)

<div class="list-group" id="list-tab" role="tablist">
  <h3 class="list-group-item list-group-item-action active" data-toggle="list"  role="tab" aria-controls="home">Table of Contents</h3>
<a class="list-group-item list-group-item-action" data-toggle="list" href="#one" role="tab" aria-controls="profile">Structure and Summary<span class="badge badge-primary badge-pill">1</span></a>
<a class="list-group-item list-group-item-action" data-toggle="list" href="#two" role="tab" aria-controls="messages">Handling Missing Values<span class="badge badge-primary badge-pill">2</span></a>
<a class="list-group-item list-group-item-action"  data-toggle="list" href="#three" role="tab" aria-controls="settings">Top Rated Apps<span class="badge badge-primary badge-pill">3</span></a>
<a class="list-group-item list-group-item-action" data-toggle="list" href="#four" role="tab" aria-controls="settings">Top rated apps based on number of ratings<span class="badge badge-primary badge-pill">4</span></a>
<a class="list-group-item list-group-item-action" data-toggle="list" href="#five" role="tab" aria-controls="settings">Bottom rated apps<span class="badge badge-primary badge-pill">5</span></a>  
<a class="list-group-item list-group-item-action" data-toggle="list" href="#six" role="tab" aria-controls="settings">Distribution of app categories<span class="badge badge-primary badge-pill">6</span></a>
<a class="list-group-item list-group-item-action" data-toggle="list" href="#seven" role="tab" aria-controls="settings">Timeline view<span class="badge badge-primary badge-pill">7</span></a>
<a class="list-group-item list-group-item-action" data-toggle="list" href="#eight" role="tab" aria-controls="settings">Price<span class="badge badge-primary badge-pill">8</span></a>

## <a id='one'>Structure and Summary </a> 

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df.describe()

## <a id="two">Handling Missing Values</a>

In [None]:
df = df.drop(5321, axis=0)

## <a id="three"> Top Rated apps</a>

In [None]:
fig = px.bar(y=df.nlargest(n=10, columns="Rating")["Name"],
             x=df.nlargest(n=10, columns="Rating")["Rating"], 
             orientation='h',
             color=df.nlargest(n=10, columns="Rating")["Name"].values,
             color_discrete_sequence=px.colors.qualitative.Prism)
fig.update_traces(opacity=0.7, 
                  marker_line_width = 2,
                  marker_line_color = 'rgb(255, 255, 255)')
fig.update_xaxes(title="Ratings")
fig.update_yaxes(title="Names")
fig.update_layout(title= "Top rated apps", height = 600, width = 800, showlegend=False)
fig.show()

In [None]:
df.query("Rating == 5")

## <a id="four">Top rated apps based on number of ratings</a>

In [None]:
fig = px.bar(y=df.query("Rating == 5").nlargest(n=10, columns="No of people Rated")["Name"],
             x=df.query("Rating == 5").nlargest(n=10, columns="No of people Rated")["No of people Rated"], 
             orientation='h',
             color=df.query("Rating == 5").nlargest(n=10, columns="No of people Rated")["Name"].values,
             text=df.query("Rating == 5").nlargest(n=10, columns="No of people Rated")["No of people Rated"],
             color_discrete_sequence=px.colors.qualitative.Prism)
fig.update_traces(opacity=0.7, 
                  marker_line_width = 2,
                  marker_line_color = 'rgb(255, 255, 255)', 
                  textposition="inside")
fig.update_yaxes(title="Names")
fig.update_xaxes(title="Counts")
fig.update_layout(title= "Top rated apps based on number of reviews", 
                  height = 600, 
                  width = 800, 
                  showlegend=False)
fig.show()

## <a id="five">Bottom rated apps</a>

In [None]:
fig = px.bar(y=df.nsmallest(n=10, columns="Rating")["Name"],
             x=df.nsmallest(n=10, columns="Rating")["Rating"], 
             orientation='h',
             color=df.nsmallest(n=10, columns="Rating")["Name"].values,
             color_discrete_sequence=px.colors.qualitative.D3)
fig.update_traces(opacity=0.7, 
                  marker_line_width = 2,
                  marker_line_color = 'rgb(255, 255, 255)')
fig.update_xaxes(title="Ratings")
fig.update_yaxes(title="Names")
fig.update_layout(title= "Bottom rated apps", height = 600, width = 800, showlegend=False)
fig.show()

In [None]:
fig = px.bar(y=df.nsmallest(n=10, columns="No of people Rated")["Name"],
             x=df.nsmallest(n=10, columns="No of people Rated")["No of people Rated"], 
             orientation='h',
             color=df.nsmallest(n=10, columns="No of people Rated")["Name"].values,
             text=df.nsmallest(n=10, columns="No of people Rated")["No of people Rated"],
             color_discrete_sequence=px.colors.qualitative.Safe)
fig.update_traces(opacity=0.7, 
                  marker_line_width = 2,
                  marker_line_color = 'rgb(255, 255, 255)', 
                  textposition="inside")
fig.update_yaxes(title="Names")
fig.update_xaxes(title="Counts")
fig.update_layout(title= "Top rated apps based on number of reviews", 
                  height = 600, 
                  width = 800, 
                  showlegend=False)
fig.show()

## <a id="six">Distribution of app categories</a>

In [None]:
fig = make_subplots(3,1, subplot_titles=["Top app categories by count", 
                                         "Top app categories by average rating", 
                                         "Top app categories by average number of reviews"])
fig.add_trace(go.Bar(x=df["Category"].value_counts().index, 
                     y=df["Category"].value_counts(), 
                      text=df["Category"].value_counts(),
                     marker=dict(color=px.colors.cyclical.mrybm)
                     ), 
              row=1, col=1)
fig.add_trace(go.Bar(y=df.groupby("Category").agg({"Rating": "mean"}).sort_values("Rating",ascending=False)["Rating"], 
                     x=df.groupby("Category").agg({"Rating": "mean"}).sort_values("Rating", ascending=False).index, 
                     text = df.groupby("Category").agg({"Rating": "mean"}).sort_values("Rating",ascending=False)["Rating"],
                     marker=dict(color=px.colors.cyclical.mrybm),
                     
                     ), 
             row=2, col=1)
fig.add_trace(go.Bar(y=df.groupby("Category").agg({"No of people Rated": "mean"}).sort_values("No of people Rated",ascending=False)["No of people Rated"], 
                     x=df.groupby("Category").agg({"No of people Rated": "mean"}).sort_values("No of people Rated", ascending=False).index, 
                     text = df.groupby("Category").agg({"No of people Rated": "mean"}).sort_values("No of people Rated",ascending=False)["No of people Rated"],
                     marker=dict(color=px.colors.cyclical.mrybm),
                     
                     ), 
             row=3, col=1)
fig.update_traces(opacity=0.7, 
                  marker_line_width = 2,
                  marker_line_color = 'rgb(255, 255, 255)', 
                   texttemplate='%{text:.3s}',
                  textposition="inside")
fig.update_layout(height=1200, width=800, showlegend=False)

## <a id="seven">Timeline view</a>

In [None]:
df.loc[:, "Date"] = df.loc[:, "Date"].apply(lambda x: pd.to_datetime(x))
fig = make_subplots(2,1, subplot_titles=["Daily count of new apps", 
                                         "Cumulative count of new apps"])
fig.add_trace(go.Scatter(y=df["Date"].sort_values().value_counts().sort_index(),
                         x=df["Date"].sort_values().value_counts().sort_index().index, 
                         marker=dict(color=px.colors.qualitative.D3[6])),
              row=1, col=1)
fig.add_trace(go.Scatter(y=df.groupby("Date").agg({"Date": "count"}).sort_index()["Date"].cumsum(),
                         x=df.groupby("Date").agg({"Date": "count"}).sort_index()["Date"].cumsum().index,
                         marker=dict(color=px.colors.qualitative.D3[4]),
                         ), 
              row=2, col=1)
fig.update_yaxes(showgrid=False)
fig.update_xaxes(showgrid=False)
fig.update_xaxes(showspikes=True, row=2, col=1)
fig.update_yaxes(showspikes=True, row=2, col=1)
fig.update_layout(showlegend=False, 
                  height=800, 
                  width=1000, 
                  yaxis=dict(range=[0, 20]))
fig.show()

## <a id="eight">Price</a>

In [None]:
df[df["Price"] == "Free"] = 0
df["Price"] = df["Price"].str.replace("₹ ", "")
df["Price"] = df["Price"].str.replace(",","")
df["Price"].fillna(0, inplace=True)
df["Price"] = df["Price"].astype(float)

In [None]:
fig = make_subplots(2,1, subplot_titles=["Top 10 most expensive apps", 
                                         "Top 10 most bang for buck apps (excluding free apps)"])
fig.add_trace(go.Bar(x=df.nlargest(10, columns="Price")["Price"].values, 
                     y=df.nlargest(10, columns="Price")["Name"], 
                     text=df.nlargest(10, columns="Price")["Price"].values,
                     orientation='h', 
                     marker=dict(color=px.colors.sequential.Blugrn*2)), 
              row=1, col=1)
fig.add_trace(go.Bar(x=df.query("Rating == 5").nsmallest(10, columns="Price")["Price"], 
                     y=df.query("Rating == 5").nsmallest(10, columns="Price")["Name"], 
                     text=df.query("Rating == 5").nsmallest(10, columns="Price")["Price"],
                     orientation='h', 
                     marker=dict(color=px.colors.sequential.amp*2)), 
              row=2, col=1)
fig.update_xaxes(title="Price", showgrid=False)
fig.update_yaxes(showgrid=False)
fig.update_traces(opacity=0.7, 
                  marker_line_width=2.5,
                  marker_line_color='rgb(255, 255, 255)', 
                  textposition="inside")
fig.update_layout(height=1000, 
                  showlegend=False)
fig.show()