In [2]:
temps = [30,48,23,90]

def mean(seq):
    """Calculate the mean"""
    return sum(seq)/len(seq)

In [3]:
mean(temps)

47.75

In [4]:
[x for x in temps if x> 50 ]

[90]

In [5]:
[x for x in temps if x > 20 ]

[30, 48, 23, 90]

# Numpy : Adds Python support for large, multi-dimensional arrays and matrices, along with a large library of high-level mathematical functions to operate on these arrays.

In [3]:
import numpy as np

In [10]:
temps_np = np.array(temps)

In [12]:
temps_np

array([30, 48, 23, 90])

In [14]:
from timeit import Timer
from numpy import arange

In [23]:
pure = Timer('list_py = range(55555); sum(list_py)').timeit(10)

In [24]:
np_time = Timer('list_np = arange(55555); list_np.sum()', 'from numpy import arange').timeit(10)

In [27]:
pure/np_time ## numpy objects are much faster than normal python aray

7.215444516616993

In [28]:
## filtering in numpy objects which is not possible in normal python
temps_np[temps_np >55]

array([90])

In [29]:
## BuiltIn function of numpy
temps_np.mean()

47.75

In [30]:
temps_np.sum()

191

In [6]:
# Add on
# NumPy stands for ‘Numerical Python’ or ‘Numeric Python’.
# It is an open source module of Python which provides fast mathematical computation on arrays and matrices.
# Since, arrays and matrices are an essential part of the Machine Learning ecosystem, NumPy along with Machine Learning modules like Scikit-learn, Pandas, Matplotlib, TensorFlow, etc. complete the Python Machine Learning Ecosystem
# NumPy provides the essential multi-dimensional array-oriented computing functionalities designed for high-level mathematical functions and scientific computation
# NumPy’s main object is the homogeneous multidimensional array.
# It is a table with same type elements, i.e, integers or string or characters (homogeneous), usually integers.
# In NumPy, dimensions are called axes. The number of axes is called the rank.

ones = np.ones( (3,4), dtype=np.int16 ) 

In [7]:
ones

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int16)

In [8]:
full = np.full( (3,4), 0.11 ) 
full

array([[0.11, 0.11, 0.11, 0.11],
       [0.11, 0.11, 0.11, 0.11],
       [0.11, 0.11, 0.11, 0.11]])

In [11]:
arrange = np.arange( 10, 30, 2 )
arrange

array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28])

In [12]:
linespace = np.linspace(0, 5/3, 6)
linespace

array([0.        , 0.33333333, 0.66666667, 1.        , 1.33333333,
       1.66666667])

In [None]:
#Some of the important attributes of a NumPy object are:

#    Ndim: displays the dimension of the array
#    Shape: returns a tuple of integers indicating the size of the array
#    Size: returns the total number of elements in the NumPy array
#    Dtype: returns the type of elements in the array, i.e., int64, character
#    Itemsize: returns the size in bytes of each item
#    Reshape: Reshapes the NumPy array

# Pandas: Software library written for data manipulation and analysis in Python. Offers data structures and operations for manipulating numerical tables and time series.

# Panda Series

In [13]:
# Similar to NumPy, Pandas is one of the most widely used python libraries in data science
# It provides high-performance, easy to use structures and data analysis tools.
# Unlike NumPy library which provides objects for multi-dimensional arrays, Pandas provides in-memory 2d table object called Dataframe
# It is like a spreadsheet with column names and row labels.
# Hence, with 2d tables, pandas is capable of providing many additional functionalities like creating pivot tables, computing columns based on other columns and plotting graphs
import pandas as pd

In [34]:
temp_ser = pd.Series(temps, name='Temprature')

In [35]:
temp_ser

0    30
1    48
2    23
3    90
Name: Temprature, dtype: int64

In [36]:
temp_ser.mean()

47.75

In [37]:
temp_ser.sum()

191

In [38]:
print(dir(temp_ser))

['T', '_AXIS_ALIASES', '_AXIS_IALIASES', '_AXIS_LEN', '_AXIS_NAMES', '_AXIS_NUMBERS', '_AXIS_ORDERS', '_AXIS_REVERSED', '_AXIS_SLICEMAP', '__abs__', '__add__', '__and__', '__array__', '__array_prepare__', '__array_priority__', '__array_wrap__', '__bool__', '__bytes__', '__class__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__div__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__long__', '__lt__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pow__', '__radd__', '__rand__', '__rdiv__', '__reduce__', '__reduce_ex__', '__re

# Boolean Array

In [39]:
hot = pd.Series([False, False, True, True])

In [41]:
temp_ser[hot]

2    23
3    90
Name: Temprature, dtype: int64

In [42]:
mask = temp_ser > 55

In [43]:
mask

0    False
1    False
2    False
3     True
Name: Temprature, dtype: bool

In [47]:
mask2 = temp_ser < 90

In [49]:
temp_ser[mask | mask2]

0    30
1    48
2    23
3    90
Name: Temprature, dtype: int64

# Index

In [54]:
temp_ser.index

RangeIndex(start=0, stop=4, step=1)

In [55]:
temp2 = pd.Series(temps, name='Temp2', index=['M','T','W','Th'])

In [56]:
temp2

M     30
T     48
W     23
Th    90
Name: Temp2, dtype: int64

In [58]:
dates = pd.date_range('20180101', periods=4)

In [59]:
temp3 = pd.Series(temps, name='Temp3', index=dates)

In [60]:
temp3

2018-01-01    30
2018-01-02    48
2018-01-03    23
2018-01-04    90
Freq: D, Name: Temp3, dtype: int64

In [61]:
temp3.index

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04'], dtype='datetime64[ns]', freq='D')

In [64]:
#index repition
temp4 = pd.Series(temps, name='Temp4', index=[0,1,1,3])

In [65]:
temp4

0    30
1    48
1    23
3    90
Name: Temp4, dtype: int64