### NumPy and Pandas Refresher
 * Stands for Numerical Python and is the core library for numeric and scientific computing
 * Consists of Single or Multi-dimensional array objects and a collection of routines for processing those arrays
 * Initializing NumPy array with all zeros is sometimes helpful

In [11]:
import numpy as np
#One dimensional Array
n1 = np.array([10,20,30,40])
print(n1)
print(type(n1))

#Two dimensional Array
n2 = np.array([[10,20,30,40], [40,30,20,10]])
print(n2)
print(type(n2))

#Initialize array with zeros
n3 = np.zeros((2,4))
print(n3)

#This will make numpy array with one value
n4 = np.full((1,3), 10)
print(n4)

#A range will do ranges or intervals, note this are not inclusive
n5 = np.arange(10,100, 10)
print(n5)
n6 = np.arange(10,20)
print(n6)

# If I want an array of random values.  This gives ten random values betwen 100 and 200
n7 = np.random.randint(100,200, 10)
print(n7)

[10 20 30 40]
<class 'numpy.ndarray'>
[[10 20 30 40]
 [40 30 20 10]]
<class 'numpy.ndarray'>
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[10 10 10]]
[10 20 30 40 50 60 70 80 90]
[10 11 12 13 14 15 16 17 18 19]
[198 157 154 161 198 117 102 119 191 102]


In [17]:
#Checking the shape of NumPy arrays is very useful to understand your data
n7.shape
print(n2.shape)

#Can also use it to change the shape
n2.shape = (2,4)
print(n2)

(4, 2)
[[10 20 30 40]
 [40 30 20 10]]


In [22]:
#Adding NumPy arrays together
n1 = np.array([10,80])
n2 = np.array([30,10])

#Vertically
print(np.sum([n1,n2]))
print(np.sum([n1,n2], axis = 0))
print(np.sum([n1,n2], axis = 1))

130
[40 90]
[90 40]


In [23]:
#Joining NumPy Arrays
n1 = np.array([1,2,3])
n2 = np.array([30,20,10])

#Vertical Stack
n3 = np.vstack((n1,n2))
print(n3)
#Horizontal Stack
n3 = np.hstack((n1,n2))
print(n3)
#Column Stack
n3 = np.column_stack((n1,n2))
print(n3)

[[ 1  2  3]
 [30 20 10]]
[ 1  2  3 30 20 10]
[[ 1 30]
 [ 2 20]
 [ 3 10]]


### Pandas 
* Stands for Panel Data and is the core library for data manipulation and data analysis 
* Consists of single (series) and multi-dimensional (data-frame) data structures for data-manipulation
* Dataframe is a 2-dimensional labelled Data-Structure

In [31]:
import pandas as pd
s1 = pd.Series([10,20,30,40,50])
print(s1)
print(type(s1))

s2 = pd.Series([10,20,30,40,50], index = ['a','b','c','d','e'])
print(s2)

#Working with dictionary
d1 = {'k1':10, 'k2': 20, 'k3': 30}
d2 = pd.Series(d1)
print(d2)

0    10
1    20
2    30
3    40
4    50
dtype: int64
<class 'pandas.core.series.Series'>
a    10
b    20
c    30
d    40
e    50
dtype: int64
k1    10
k2    20
k3    30
dtype: int64


In [39]:
#Creating a data.frame
df = pd.DataFrame({"College" :['Clemson', 'UNC', 'FSU', 'South Carolina'], "Football Ranking":[1,13,22, 60]})
df

Unnamed: 0,College,Football Ranking
0,Clemson,1
1,UNC,13
2,FSU,22
3,South Carolina,60


In [46]:
#Built in Data Functions
iris = pd.read_csv('C:/Users/locone/Documents/2020/Python/Prac_data/iris.csv')

#Top Rows
print(iris.head(10))

print('  ' )

#Last Rows
print(iris.tail(5))

print(' ')

#Basic Descriptions
iris.describe()


   sepal.length  sepal.width  petal.length  petal.width variety
0           5.1          3.5           1.4          0.2  Setosa
1           4.9          3.0           1.4          0.2  Setosa
2           4.7          3.2           1.3          0.2  Setosa
3           4.6          3.1           1.5          0.2  Setosa
4           5.0          3.6           1.4          0.2  Setosa
5           5.4          3.9           1.7          0.4  Setosa
6           4.6          3.4           1.4          0.3  Setosa
7           5.0          3.4           1.5          0.2  Setosa
8           4.4          2.9           1.4          0.2  Setosa
9           4.9          3.1           1.5          0.1  Setosa
  
     sepal.length  sepal.width  petal.length  petal.width    variety
145           6.7          3.0           5.2          2.3  Virginica
146           6.3          2.5           5.0          1.9  Virginica
147           6.5          3.0           5.2          2.0  Virginica
148           6.2

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


### iLoc & Loc
* iLoc & loc can be used to grab different records
>* iloc: By position with the first argument being rows and second being columns
>* loc Grabs specific columns by column name

In [57]:
#First three rows
iris.iloc[0:3,]

#First five rows and the 3rd and five columns remember Python starts counting at 0
iris.iloc[0:5, 2:4]

#Grabs just columns specified 
iris.loc[80:85,('petal.length', 'sepal.length')]

Unnamed: 0,petal.length,sepal.length
80,3.8,5.5
81,3.7,5.5
82,3.9,5.8
83,5.1,6.0
84,4.5,5.4
85,4.5,6.0


In [67]:
#Other functionality for dataframes
#Boolean Return
iris['sepal.length'] > 5

#This will grab all the data of those that were True
iris[iris['sepal.length']>5]

#Can combine these together.  This sheds light on how you can get to do the data you want
iris[(iris['petal.length'] > 6) & (iris['variety'] == 'Virginica')]

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
105,7.6,3.0,6.6,2.1,Virginica
107,7.3,2.9,6.3,1.8,Virginica
109,7.2,3.6,6.1,2.5,Virginica
117,7.7,3.8,6.7,2.2,Virginica
118,7.7,2.6,6.9,2.3,Virginica
122,7.7,2.8,6.7,2.0,Virginica
130,7.4,2.8,6.1,1.9,Virginica
131,7.9,3.8,6.4,2.0,Virginica
135,7.7,3.0,6.1,2.3,Virginica
