# Numerical Tools
Covers: Numpy & Pandas 

## Numpy

#### Creating Numpy Arrays

In [None]:
import numpy as np

# Create special Numpy array (more efficient & faster)
arr = np.array([1, 2, 3, 4, 5])  # type) numpy.ndarray
arr = np.array((1, 2, 3, 4, 5))  # Can also convert tuples and other array-like objects

# 2-D Arrays
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
# 3-d Arrays, and so on
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])

# Using the 'ndim' attribute, which returns dimensions of array
print(arr3d.ndim)

#### Accessing Numpy Arrays

In [None]:
# Refer using index number
x = arr[0]
x2 = arr2d[0, 1]  # Accessing 2-d array

# Iterating through arrays
for num in arr:
    print(num)
# 2-D Arrays
for row in arr2d:
    for col in arr2d:
        print(col)
for j in arr2d:
    print(j)  # Prints entire row

#### Sorting Numpy Arrays

In [None]:
print(np.sort(arr))  # Does not permanently sort it

#### Calculation between two arrays

In [None]:
arr_x = np.array([1,2,3,4])
arr_y = np.array([4,5,6,7])

# Array + Array
print(arr_x + arr_y)

# Array + Number
print(arr_x + 2)

#### Element-wise multiplication

In [None]:
arr1d = [1, 2, 3]

arr2d * arr1d  # Will broadcast 1d to 2d by vertically copying, 
                #then does element-wise multiplication

arr2d * arr2d  # Element-wise multiplication

#### Matrix Multiplication

In [None]:
matrix2d = np.mat(arr2d)  # Converts to numpy.matrix object

matrix2d * matrix2d  # Matrix multiplication

## Pandas

#### Making a dataframe

In [None]:
import pandas as pd
# Sample Data
football_data = { 'year': [2010, 2011, 2012, 2010, 2011, 2012, 2010, 2011, 2012],'team': ['FCBarcelona', 'FCBarcelona','FCBarcelona', 'RMadrid','RMadrid', 'RMadrid','ValenciaCF', 'ValenciaCF','ValenciaCF'],'wins':[30 , 28 , 32 , 29 , 32 , 26 , 21 , 17 , 19] ,'draws': [6 , 7 , 4 , 5 , 4 , 7 , 8 , 10 , 8] ,'losses': [2 , 3 , 2 , 4 , 2 , 5 , 9 , 11 , 11]}

df = pd.DataFrame(football_data) # Takes in numerous datatypes, such as dictionaries

#### Getting specific parts of dataframe

In [None]:
df.head(2)  # Returns top 2
# Similar to Python list slicing, returns rows 2 to end
df[2:]  
# Get the whole 'team' column
df['team']
# Get multiple columns
df[['team', 'wins', 'draws']]

#### Adding a new column

In [None]:
# Make a new column by referencing a unexisting column
df['m_index'] = np.array(list(df.index)) + 1

#### Basic Descriptions & Statistics

In [None]:
# Runs basic statistics on the numerical columns in dataframe
df.describe()
df.mean()
df.info() # Returns concise summary of dataframe, displaying index range, 
          # column names, non-null counts, Dtypes, and Memory Usage

#### Series vs. Dataframe Returns

In [None]:
df['wins']  # Returns a series
df[['wins']]  # Returns a dataframe

#### Extracting data

In [None]:
# Extract data, returning True or False (element-wise testing)
df['team'] == 'RMadrid'

# Selecting rows using boolean indexing
df[ (df['team'] == 'RMadrid') | (df['wins'] > 10) ]

# Example of getting specific rows w/ boolean indexing
fcb_vcf_wins = df[(df['team'] == 'FCBarcelona') \
                           | (df['team'] == 'ValenciaCF')][['year','team','wins']]

#### Built-in Pandas plotting

In [None]:
# Example
df[df['team'] == 'FCBarcelona'].plot.bar(x='year', y='wins', 
   color=['b', 'r', 'g'], 
   title='FC Barcelona Wins from 2010 to 2012',
   xlabel='Year',
   ylabel='Wins'
   )

#### Pandas .pivot() method

In [None]:
# Reshapes the pandas dataframe
fcb_vcf_wins_pivoted = fcb_vcf_wins.pivot(index='year', columns='team', values='wins')