# 2.3.1 Basic Commands

In [3]:
# imports and setup
import numpy as np
from scipy.stats import pearsonr

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # for 3D plots

import math

import pandas as pd

%matplotlib inline
pd.set_option('display.precision', 2) # number precision for pandas
plt.style.use('seaborn') # pretty matplotlib plots

In [None]:
# array creation
x = np.array([1, 6, 2])
y = np.array([1, 4, 3])
len(x), len(y)

In [None]:
# array operations
x + y

In [None]:
# matrix creation
x = np.asmatrix(np.arange(1, 5).reshape(2, 2).transpose())
x

In [None]:
#matrix operations
np.power(x, 2)

In [None]:
# random normal distribution & correlation
x = np.random.normal(size=50)
y = x + np.random.normal(loc=50, scale=.1, size=50)
pearsonr(x, y)[0]

In [None]:
# random seed and basic statistical functions
np.random.seed(3)
y = np.random.normal(size=100)
y.mean(), y.var(), np.sqrt(y.var()), y.std()

# 2.3.2 Graphics

In [None]:
x = np.random.normal(size=100)
y = np.random.normal(size=100)

# seaborn scatterplot
p = sns.jointplot(x, y, kind='scatter')
p.set_axis_labels(xlabel='x axis', ylabel='y axis');

In [None]:
# create a sequence of numbers
x = np.arange(1, 11)
x

In [None]:
# linearly spaced numbers
x = np.linspace(-np.pi, np.pi, num=50)
x

In [None]:
x = np.linspace(-np.pi, np.pi, num=50)
y = x

# simulating R outer function
def pf(a, b):
    return math.cos(b) / (1 + a**2)

f = np.empty((len(x), len(y)))
 
for i in range(len(x)):
    for j in range(len(y)):
        f[i,j] = pf(x[i], y[j])

        
# contour plot
cp = plt.contour(x, y, f, 45, cmap='viridis')
plt.clabel(cp, inline=1, fontsize=10);

In [None]:
# contour 2
fa = (f - f.transpose())/2
cp = plt.contour(x, y, fa, 15, cmap='viridis')
plt.clabel(cp, inline=1, fontsize=10);

In [None]:
# heatmap
cp = plt.contourf(x, y, fa, 15, cmap='viridis')
plt.clabel(cp, inline=1, fontsize=10)
plt.colorbar();

In [None]:
# 3d perspective
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_wireframe(x, y, fa, cmap='viridis')
ax.view_init(30, 100);

# 2.3.3 Indexing Data

In [None]:
# matrix creation (R equivalent of matrix(1:16, 4 ,4))
A = np.asmatrix(np.arange(1, 17).reshape(4, 4).transpose())
A

In [None]:
A[1, 2]

In [None]:
# list selections needs explicit row repetition for multiple columns
A[[[0, 0], [2, 2]], [1, 3]] 

In [None]:
# select a range of rows and columns
A[0:3, 1:4]

In [None]:
# select a range of rows and all columns
A[0:2,:]

In [None]:
# select all rows and a range of columns
A[:,0:2]

In [None]:
# shape of the matrix
A.shape

# 2.3.4 Loading Data

In [None]:
# read csv data with pandas into dataframe, explicitly setting na_values.
# pandas read_xxx functions infer datatypes, headers, dates, etc. 
# without explicit declarations
Auto = pd.read_csv('../datasets/Auto.csv', na_values=['?'])
Auto

In [None]:
Auto.shape

In [None]:
# dropping rows (axis-0) where there are NA values (inplace)
Auto.dropna(axis=0, inplace=True)
Auto.shape

In [None]:
# get column names of the dataframe
list(Auto.columns)

In [None]:
# seaborn scatterplot
pl = sns.jointplot(x='cylinders', y='mpg', data=Auto);

In [None]:
# changing data type of a column into category
Auto['cylinders'] = Auto['cylinders'].astype('category')
Auto

In [None]:
# seaborn boxplot implementation
sns.boxplot(x='cylinders', y='mpg', data=Auto);

In [None]:
# seaborn enhanced histogram with density plot
sns.distplot(Auto['mpg'], bins=15);

In [None]:
# seaborn pairplot for selected variables, colored by another
sns.pairplot(Auto, vars=['mpg', 'displacement', 'horsepower', 'weight', 'acceleration'], hue='cylinders');

In [None]:
# summary statistics for all dataframe columns, including non-numerical ones
Auto.describe(include='all')

In [None]:
# summary statistics for a single column
# wrapped as dataframe for pretty table display in jupyter
pd.DataFrame(Auto['mpg'].describe())