In [None]:
# Numerical Programming 

import numpy as np # load the numpy library

a = np.linspace(-np.pi, np.pi, 100) # create even grid from -pie to +pie)
b = np.cos(a) # apply cosine to each element of a
c = np.sin(a) # apply sin to each element of a

# inner product
print(b@c)

In [None]:
# using scipy 

from scipy.stats import norm
from scipy.integrate import quad

phi = norm()
value, error = quad(phi.pdf, -2, 2) # integrate using guassian quadrature 
value 

In [None]:
# using scipy in linear algebra

from scipy import linalg 

a = np.array([[1., 2.], [3., 4.]])

#inverse of matrix a 
print(linalg.inv(a))

# dot product of matrix a and inverse of matrix a
print(np.dot(a, linalg.inv(a)))

In [None]:
# using scipy to solve a * x = b linear equation set for x unknown

a = np.array([[3, 2, 0], [1, -1, 0], [0, 5, 1]])
b = np.array([2, 4, -1])
x = linalg.solve(a, b)

print(x)
print(np.dot(a, x) == b)

In [None]:
# using scipy to integrate 

from scipy import integrate

x2 = lambda x: x**2
print(integrate.quad(x2, 0, 4))

# analytical result of above 
print(4**3 / 3.)

In [None]:
# using scipy to integrate exponential function 

invexp = lambda x: np.exp(-x)
print(integrate.quad(invexp, 0, np.inf))

f = lambda x,a : a*x
y, err = integrate.quad(f, 0, 1, args=(1,))
print(y)

y, err = integrate.quad(f, 0, 1, args =(3,))
print(y)


In [None]:
# solving initial value problem 

from scipy.integrate import solve_ivp
import matplotlib.pyplot as plt

# defining exponential_decay function
def exponential_decay(t, y):
    return -0.5 * y

sol = solve_ivp(exponential_decay, [0, 10], [2, 4, 8])

print(sol.t)
print(sol.y)

In [None]:
# statistics usage in scipy.Stats

from scipy import stats
from scipy.stats import norm 

print('bounds of distribtuion lower: %s, upper: %s' % (norm.a, norm.b))

print(norm.cdf(0))
print(norm.cdf([-1., 0, 1.0]))
print(norm.mean(), norm.std(), norm.var())
print(norm.stats(moments ="mv"))

# to find median of a distribution, we can use the percent point function ppf, which is inverse of cd
print(norm.ppf(0.5))

np.random.seed(1234)
print(norm.rvs(size = 5, random_state = 1234))
print(norm.rvs(5))

In [None]:
# simple plot using matplotlib 

import matplotlib.pyplot as plt
plt.plot([1,2,3,4])
plt.ylabel('some numbers')
plt.plot([1,2,3,4], [1,4,9,16], 'ro')
plt.axis([0, 6, 0, 20])

# evenly sampled time at 200ms intervals
t = np.arange(0., 5., 0.2)

# red dashes, blue squares and green triangles
plt.plot(t, t, 'r--', t, t**2, 'bs', t, t**3, 'g^')

plt.show()



In [None]:
# working with multiple figures and axes

def f(t):
    return np.exp(-t)* np.cos(2*np.pi*t)

t1 = np.arange(0.0, 5.0, 0.1)
t2 = np.arange(0.0, 5.0, 0.02)

plt.figure(1)
plt.subplot(211)
plt.plot(t1, f(t1), 'bo', t2, f(t2), 'k')

plt.subplot(212)
plt.plot(t2, np.cos(2*np.pi*t2), 'r--')
plt.show()

In [None]:
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap

output_file("gmap.html")

map_options = GMapOptions(lat=30.2861, lng=-97.7394, map_type="roadmap", zoom=11)

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:

source = ColumnDataSource(
    data=dict(lat=[ 30.29,  30.20,  30.29],
              lon=[-97.70, -97.74, -97.78])
)

p.circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, source=source)

show(p)

In [7]:
# Using plotly.express for plot
import plotly.express as px

df = px.data.stocks()
fig = px.line(df, x='date', y="GOOG")
fig.show()

In [8]:
import plotly.express as px

df = px.data.stocks(indexed=True)-1
fig = px.area(df, facet_col="company", facet_col_wrap=2)
fig.show()

In [92]:
# using symbolic algebra 

from sympy import Symbol

x, y = Symbol('x'), Symbol('y')  # Treat 'x' and 'y' as algebraic symbols
x + x + x + y

expression = (x + y)**2
expression.expand()

x**2 + 2*x*y + y**2

In [93]:
# Using Pandas

import pandas as pd
np.random.seed(1234)

data = np.random.randn(5, 2) # 5*2 matrix of N(0,1) random draws
dates = pd.date_range('20/02/2021', periods = 5)

df = pd.DataFrame(data, columns = ('price', 'weight'), index = dates)
print(df)
print("\nMean \n",df.mean())

               price    weight
2021-02-20  0.471435 -1.190976
2021-02-21  1.432707 -0.312652
2021-02-22 -0.720589  0.887163
2021-02-23  0.859588 -0.636524
2021-02-24  0.015696 -2.242685

Mean 
 price     0.411768
weight   -0.699135
dtype: float64


In [94]:
# using stats model
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Load data 
dat = sm.datasets.get_rdataset("Guerry", "HistData").data

# Fit regiression model(using the natural log of one of regressors)
results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data = dat).fit()

print(results.summary())


                            OLS Regression Results                            
Dep. Variable:                Lottery   R-squared:                       0.348
Model:                            OLS   Adj. R-squared:                  0.333
Method:                 Least Squares   F-statistic:                     22.20
Date:                Sat, 20 Feb 2021   Prob (F-statistic):           1.90e-08
Time:                        19:25:21   Log-Likelihood:                -379.82
No. Observations:                  86   AIC:                             765.6
Df Residuals:                      83   BIC:                             773.0
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept         246.4341     35.233     

In [100]:
# using scikit learn for liner regression 

from sklearn import linear_model

reg = linear_model.LinearRegression()
reg.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
reg.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
print(reg.coef_)


[0.5 0.5]


In [9]:
# using Pystan with a hierarchical model used to study coaching effects acorss eight school

import stan

schools_code = """
data {
  int<lower=0> J;         // number of schools
  real y[J];              // estimated treatment effects
  real<lower=0> sigma[J]; // standard error of effect estimates
}
parameters {
  real mu;                // population treatment effect
  real<lower=0> tau;      // standard deviation in treatment effects
  vector[J] eta;          // unscaled deviation from mu by school
}
transformed parameters {
  vector[J] theta = mu + tau * eta;        // school treatment effects
}
model {
  target += normal_lpdf(eta | 0, 1);       // prior log-density
  target += normal_lpdf(y | theta, sigma); // log-likelihood
}
"""

schools_data = {"J": 8,
                "y": [28,  8, -3,  7, -1,  1, 18, 12],
                "sigma": [15, 10, 16, 11,  9, 11, 10, 18]}

posterior = stan.build(schools_code, data=schools_data)
fit = posterior.sample(num_chains=4, num_samples=1000)
eta = fit["eta"]  # array with shape (8, 4000)
df = fit.to_frame()  # pandas `DataFrame, requires pandas

AttributeError: module 'stan' has no attribute 'build'

In [8]:
# using PyMC to model disaster 

from pymc import DiscreteUniform, Exponential, determinister, Possion, Uniform


ModuleNotFoundError: No module named 'pymc'

In [4]:
import theano
from theano import tensor

# Declare two symbolic floating-point scalars
a = tensor.dscalar()
b = tensor.dscalar()

# Create a simple expression
c = a + b

# Convert the expression into a callable object that takes (a, b)
# values as input and computes a value for c
f = theano.function([a, b], c)

# Bind 1.5 to 'a', 2.5 to 'b', and evaluate 'c'
assert 4.0 == f(1.5, 2.5)

ModuleNotFoundError: No module named 'theano'