# Intro to Python and Data
In this overview we are going to walk through some basic principles of programming in python as well as exporing data.

# Numpy Arrays
High performance arrays written in C

In [1]:
import numpy as np

In [2]:
fixed_array = np.zeros((100,100), dtype=int)

In [3]:
fixed_array

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

## Working with Arrays

In [4]:
fixed_array[1]    # Select Single Row

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [5]:
fixed_array[1,2]   # Select Single Value at Row,Column

0

In [6]:
rand_array = np.random.rand(3,2)    # generate random values between 0 and 1 
rand_array                          # for array with 3 rows and 2 columns

array([[0.53751898, 0.29898055],
       [0.63104892, 0.86667857],
       [0.29978282, 0.18235668]])

# Loading Data
Data grabbed from, https://www.kaggle.com/tristan581/17k-apple-app-store-strategy-games


## Pandas Dataframes
Objects representing data tables with columns and transformation functions

> RESOURCE: useful [reference sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf)

<img src="https://cdn-images-1.medium.com/max/1600/1*JjZYjvyBurwgQa1RBRtzAA.png" style="width: 70%" />

# Creating a Dataframe

In [7]:
import pandas as pd

In [8]:
df = pd.DataFrame({"colA":[1,2,3], "colB":[4,5,6]})
df

Unnamed: 0,colA,colB
0,1,4
1,2,5
2,3,6


# Importing Data

In [9]:
data = pd.read_csv("data/appstore_games.csv")

In [10]:
data.head(3)

Unnamed: 0,URL,ID,Name,Subtitle,Icon URL,Average User Rating,User Rating Count,Price,In-app Purchases,Description,Developer,Age Rating,Languages,Size,Primary Genre,Genres,Original Release Date,Current Version Release Date
0,https://apps.apple.com/us/app/sudoku/id284921427,284921427,Sudoku,,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,4.0,3553.0,2.99,,"Join over 21,000,000 of our fans and download ...",Mighty Mighty Good Games,4+,"DA, NL, EN, FI, FR, DE, IT, JA, KO, NB, PL, PT...",15853568.0,Games,"Games, Strategy, Puzzle",11/07/2008,30/05/2017
1,https://apps.apple.com/us/app/reversi/id284926400,284926400,Reversi,,https://is4-ssl.mzstatic.com/image/thumb/Purpl...,3.5,284.0,1.99,,"The classic game of Reversi, also known as Oth...",Kiss The Machine,4+,EN,12328960.0,Games,"Games, Strategy, Board",11/07/2008,17/05/2018
2,https://apps.apple.com/us/app/morocco/id284946595,284946595,Morocco,,https://is5-ssl.mzstatic.com/image/thumb/Purpl...,3.0,8376.0,0.0,,Play the classic strategy game Othello (also k...,Bayou Games,4+,EN,674816.0,Games,"Games, Board, Strategy",11/07/2008,5/09/2017


In [11]:
data.tail(3)

Unnamed: 0,URL,ID,Name,Subtitle,Icon URL,Average User Rating,User Rating Count,Price,In-app Purchases,Description,Developer,Age Rating,Languages,Size,Primary Genre,Genres,Original Release Date,Current Version Release Date
17004,https://apps.apple.com/us/app/rabbit-vs-tortoi...,1474962324,Rabbit Vs Tortoise,,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,,,0.0,,"""Rabbit Vs Tortoise is chess type cool simple ...",Vishal Baldha,4+,EN,23207936.0,Games,"Games, Strategy",3/08/2019,3/08/2019
17005,https://apps.apple.com/us/app/fatall/id1474963671,1474963671,FaTaLL,Most fun game!!!,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,,,0.0,"9.99, 49.99, 3.99",Upgrade your character and use your skills to ...,Tayrem Games,4+,EN,196750336.0,Games,"Games, Strategy, Action",1/08/2019,1/08/2019
17006,https://apps.apple.com/us/app/the-three-kingdo...,1475076711,The Three Kingdoms :Bomb,,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,,,0.0,,The protagonist uses the method of placing a b...,ming bo tang,4+,"EN, ZH",22952960.0,Games,"Games, Strategy, Puzzle",2/08/2019,2/08/2019


In [12]:
data.describe()

Unnamed: 0,ID,Average User Rating,User Rating Count,Price,Size
count,17007.0,7561.0,7561.0,16983.0,17006.0
mean,1059614000.0,4.060905,3306.531,0.813419,115706400.0
std,299967600.0,0.751428,42322.56,7.835732,203647700.0
min,284921400.0,1.0,5.0,0.0,51328.0
25%,899654300.0,3.5,12.0,0.0,22950140.0
50%,1112286000.0,4.5,46.0,0.0,56768950.0
75%,1286983000.0,4.5,309.0,0.0,133027100.0
max,1475077000.0,5.0,3032734.0,179.99,4005591000.0


# Working With Data

In [13]:
### Selecting a Column ###
data['Price'].head()

0    2.99
1    1.99
2    0.00
3    0.00
4    2.99
Name: Price, dtype: float64

In [14]:
### Selecting a Column With conditions ###
data['Price'][data.Price > 2].head(8)

0     2.99
4     2.99
26    2.99
29    2.99
30    2.99
32    5.99
33    7.99
35    2.99
Name: Price, dtype: float64