In [1]:
# Pandas

In [2]:
import pandas as pd

In [3]:
# Series

In [4]:
S = pd.Series([9, -1, 2, 13])

In [5]:
print(S)

0     9
1    -1
2     2
3    13
dtype: int64


In [6]:
S[0]

9

In [8]:
S[2:4]

2     2
3    13
dtype: int64

In [9]:
S.values

array([ 9, -1,  2, 13])

In [10]:
S.index

RangeIndex(start=0, stop=4, step=1)

In [11]:
S.dtypes

dtype('int64')

In [12]:
S2 = pd.Series([9, -1, 2, 13], index = ['first', 'second', 'third', 'fourth'])

In [13]:
print(S2)

first      9
second    -1
third      2
fourth    13
dtype: int64


In [14]:
S2['first']

9

In [15]:
S2['first':'third']

first     9
second   -1
third     2
dtype: int64

In [16]:
# Dictionary and Series

In [17]:
D = {'a':2000, 'b':4000, 'c':1500, 'd':7000}

In [18]:
S3 = pd.Series(D)

In [19]:
print(S3)

a    2000
b    4000
c    1500
d    7000
dtype: int64


In [20]:
# Data Frame

In [21]:
data = {'name':['John', 'Jane', 'Tom', 'Rick', 'Sara'], 'year':[2013, 2017, 2015, 2016, 2015], 'points':[2.5, 3.2, 2.4, 7.1, 1.1]}

In [22]:
df = pd.DataFrame(data)

In [23]:
print(df)

   name  year  points
0  John  2013     2.5
1  Jane  2017     3.2
2   Tom  2015     2.4
3  Rick  2016     7.1
4  Sara  2015     1.1


In [24]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [25]:
df.columns

Index(['name', 'year', 'points'], dtype='object')

In [26]:
df.values

array([['John', 2013, 2.5],
       ['Jane', 2017, 3.2],
       ['Tom', 2015, 2.4],
       ['Rick', 2016, 7.1],
       ['Sara', 2015, 1.1]], dtype=object)

In [28]:
df2 = pd.DataFrame(data, columns = ['year', 'name', 'points', 'penalty'], index =['one', 'two', 'three', 'four', 'five'])

In [29]:
print(df2)

       year  name  points penalty
one    2013  John     2.5     NaN
two    2017  Jane     3.2     NaN
three  2015   Tom     2.4     NaN
four   2016  Rick     7.1     NaN
five   2015  Sara     1.1     NaN


In [30]:
df2.describe()

Unnamed: 0,year,points
count,5.0,5.0
mean,2015.2,3.26
std,1.48324,2.27662
min,2013.0,1.1
25%,2015.0,2.4
50%,2015.0,2.5
75%,2016.0,3.2
max,2017.0,7.1


In [31]:
df2['year']

one      2013
two      2017
three    2015
four     2016
five     2015
Name: year, dtype: int64

In [32]:
df2.year

one      2013
two      2017
three    2015
four     2016
five     2015
Name: year, dtype: int64

In [33]:
df2[['year', 'points']]

Unnamed: 0,year,points
one,2013,2.5
two,2017,3.2
three,2015,2.4
four,2016,7.1
five,2015,1.1


In [34]:
df2['penalty'] = 0.5

In [35]:
print(df2)

       year  name  points  penalty
one    2013  John     2.5      0.5
two    2017  Jane     3.2      0.5
three  2015   Tom     2.4      0.5
four   2016  Rick     7.1      0.5
five   2015  Sara     1.1      0.5


In [36]:
df2['penalty'] = [0.3, 0.5, 0.2, 0.1, 0.7]

In [37]:
print(df2)

       year  name  points  penalty
one    2013  John     2.5      0.3
two    2017  Jane     3.2      0.5
three  2015   Tom     2.4      0.2
four   2016  Rick     7.1      0.1
five   2015  Sara     1.1      0.7


In [38]:
val = pd.Series([-1.3, -2.2, -0.9], index=['two', 'four', 'five'])

In [39]:
df2['debt'] = val

In [40]:
print(df2)

       year  name  points  penalty  debt
one    2013  John     2.5      0.3   NaN
two    2017  Jane     3.2      0.5  -1.3
three  2015   Tom     2.4      0.2   NaN
four   2016  Rick     7.1      0.1  -2.2
five   2015  Sara     1.1      0.7  -0.9


In [41]:
df2['net_points'] = df2['points'] - df2['penalty']

In [42]:
print(df2)

       year  name  points  penalty  debt  net_points
one    2013  John     2.5      0.3   NaN         2.2
two    2017  Jane     3.2      0.5  -1.3         2.7
three  2015   Tom     2.4      0.2   NaN         2.2
four   2016  Rick     7.1      0.1  -2.2         7.0
five   2015  Sara     1.1      0.7  -0.9         0.4


In [43]:
df2['high_points'] = df2['net_points'] > 2.0

In [44]:
print(df2)

       year  name  points  penalty  debt  net_points  high_points
one    2013  John     2.5      0.3   NaN         2.2         True
two    2017  Jane     3.2      0.5  -1.3         2.7         True
three  2015   Tom     2.4      0.2   NaN         2.2         True
four   2016  Rick     7.1      0.1  -2.2         7.0         True
five   2015  Sara     1.1      0.7  -0.9         0.4        False


In [45]:
del df2['debt']

In [46]:
print(df2)

       year  name  points  penalty  net_points  high_points
one    2013  John     2.5      0.3         2.2         True
two    2017  Jane     3.2      0.5         2.7         True
three  2015   Tom     2.4      0.2         2.2         True
four   2016  Rick     7.1      0.1         7.0         True
five   2015  Sara     1.1      0.7         0.4        False


In [47]:
# row operation

In [48]:
df2[0:3]

Unnamed: 0,year,name,points,penalty,net_points,high_points
one,2013,John,2.5,0.3,2.2,True
two,2017,Jane,3.2,0.5,2.7,True
three,2015,Tom,2.4,0.2,2.2,True


In [49]:
df2.loc['two']

year           2017
name           Jane
points          3.2
penalty         0.5
net_points      2.7
high_points    True
Name: two, dtype: object

In [50]:
df2.loc['two':'four']

Unnamed: 0,year,name,points,penalty,net_points,high_points
two,2017,Jane,3.2,0.5,2.7,True
three,2015,Tom,2.4,0.2,2.2,True
four,2016,Rick,7.1,0.1,7.0,True


In [51]:
df2.loc['two':'four', 'year']

two      2017
three    2015
four     2016
Name: year, dtype: int64

In [52]:
df2.loc['two':'four', ['year', 'penalty']]

Unnamed: 0,year,penalty
two,2017,0.5
three,2015,0.2
four,2016,0.1


In [53]:
df2.loc['six', :] = [2013, 'Mary', 3.1, 0.7, 2.4, True]

In [54]:
print(df2)

         year  name  points  penalty  net_points  high_points
one    2013.0  John     2.5      0.3         2.2         True
two    2017.0  Jane     3.2      0.5         2.7         True
three  2015.0   Tom     2.4      0.2         2.2         True
four   2016.0  Rick     7.1      0.1         7.0         True
five   2015.0  Sara     1.1      0.7         0.4        False
six    2013.0  Mary     3.1      0.7         2.4         True


In [55]:
df2.iloc[3]

year           2016
name           Rick
points          7.1
penalty         0.1
net_points        7
high_points    True
Name: four, dtype: object

In [56]:
df2.iloc[3:5, 1:3]

Unnamed: 0,name,points
four,Rick,7.1
five,Sara,1.1


In [57]:
df2.iloc[[0, 1, 3], [2,4]]

Unnamed: 0,points,net_points
one,2.5,2.2
two,3.2,2.7
four,7.1,7.0


In [58]:
df2.loc[df2['year'] > 2013, :]

Unnamed: 0,year,name,points,penalty,net_points,high_points
two,2017.0,Jane,3.2,0.5,2.7,True
three,2015.0,Tom,2.4,0.2,2.2,True
four,2016.0,Rick,7.1,0.1,7.0,True
five,2015.0,Sara,1.1,0.7,0.4,False


In [59]:
df2.loc[df2['year']==2015, ['name', 'points']]

Unnamed: 0,name,points
three,Tom,2.4
five,Sara,1.1


In [60]:
df2.loc[df2['points']>3, 'penalty'] = 0

In [61]:
print(df2)

         year  name  points  penalty  net_points  high_points
one    2013.0  John     2.5      0.3         2.2         True
two    2017.0  Jane     3.2      0.0         2.7         True
three  2015.0   Tom     2.4      0.2         2.2         True
four   2016.0  Rick     7.1      0.0         7.0         True
five   2015.0  Sara     1.1      0.7         0.4        False
six    2013.0  Mary     3.1      0.0         2.4         True


In [62]:
df2.drop('two')

Unnamed: 0,year,name,points,penalty,net_points,high_points
one,2013.0,John,2.5,0.3,2.2,True
three,2015.0,Tom,2.4,0.2,2.2,True
four,2016.0,Rick,7.1,0.0,7.0,True
five,2015.0,Sara,1.1,0.7,0.4,False
six,2013.0,Mary,3.1,0.0,2.4,True


In [63]:
df2.sum(axis = 0)

year                             12089
name           JohnJaneTomRickSaraMary
points                            19.4
penalty                            1.2
net_points                        16.9
high_points                          5
dtype: object

In [64]:
df2.mean(axis = 0)

year           2014.833333
points            3.233333
penalty           0.200000
net_points        2.816667
high_points       0.833333
dtype: float64

In [65]:
df2.sum(axis = 1)

one      2019.0
two      2023.9
three    2020.8
four     2031.1
five     2017.2
six      2019.5
dtype: float64

In [66]:
# read csv file

In [67]:
tips = pd.read_csv('tips.csv')

In [68]:
print(tips)

     total_bill   tip smoker   day    time  size
0         16.99  1.01     No   Sun  Dinner     2
1         10.34  1.66     No   Sun  Dinner     3
2         21.01  3.50     No   Sun  Dinner     3
3         23.68  3.31     No   Sun  Dinner     2
4         24.59  3.61     No   Sun  Dinner     4
..          ...   ...    ...   ...     ...   ...
239       29.03  5.92     No   Sat  Dinner     3
240       27.18  2.00    Yes   Sat  Dinner     2
241       22.67  2.00    Yes   Sat  Dinner     2
242       17.82  1.75     No   Sat  Dinner     2
243       18.78  3.00     No  Thur  Dinner     2

[244 rows x 6 columns]


In [69]:
tips['tip_rate'] = tips['tip']/tips['total_bill']*100

In [70]:
print(tips)

     total_bill   tip smoker   day    time  size   tip_rate
0         16.99  1.01     No   Sun  Dinner     2   5.944673
1         10.34  1.66     No   Sun  Dinner     3  16.054159
2         21.01  3.50     No   Sun  Dinner     3  16.658734
3         23.68  3.31     No   Sun  Dinner     2  13.978041
4         24.59  3.61     No   Sun  Dinner     4  14.680765
..          ...   ...    ...   ...     ...   ...        ...
239       29.03  5.92     No   Sat  Dinner     3  20.392697
240       27.18  2.00    Yes   Sat  Dinner     2   7.358352
241       22.67  2.00    Yes   Sat  Dinner     2   8.822232
242       17.82  1.75     No   Sat  Dinner     2   9.820426
243       18.78  3.00     No  Thur  Dinner     2  15.974441

[244 rows x 7 columns]


In [71]:
groupedbysmoke = tips.groupby('smoker')

In [72]:
groupedbysmoke['tip_rate'].agg('mean')

smoker
No     15.932846
Yes    16.319604
Name: tip_rate, dtype: float64

In [73]:
groupedbytime_day = tips.groupby(['day', 'time'])

In [74]:
groupedbytime_day['tip_rate'].agg('mean')

day   time  
Fri   Dinner    15.891611
      Lunch     18.876489
Sat   Dinner    15.315172
Sun   Dinner    16.689729
Thur  Dinner    15.974441
      Lunch     16.130074
Name: tip_rate, dtype: float64

In [75]:
# Bubble Sort

In [77]:
def BubbleSort(L):
    n = len(L)
    for i in range(n):
        for j in range(0, n - i - 1):
            if L[j] > L[j + 1]:
                L[j], L[j + 1] = L[j + 1], L[j]
        print(L)
    return L

In [78]:
L = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]

In [79]:
BubbleSort(L)

[3, 38, 5, 44, 15, 36, 26, 27, 2, 46, 4, 19, 47, 48, 50]
[3, 5, 38, 15, 36, 26, 27, 2, 44, 4, 19, 46, 47, 48, 50]
[3, 5, 15, 36, 26, 27, 2, 38, 4, 19, 44, 46, 47, 48, 50]
[3, 5, 15, 26, 27, 2, 36, 4, 19, 38, 44, 46, 47, 48, 50]
[3, 5, 15, 26, 2, 27, 4, 19, 36, 38, 44, 46, 47, 48, 50]
[3, 5, 15, 2, 26, 4, 19, 27, 36, 38, 44, 46, 47, 48, 50]
[3, 5, 2, 15, 4, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[3, 2, 5, 4, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]


[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]

In [80]:
# Selection Sort

In [81]:
def SelectionSort(L):
    n = len(L)
    for i in range(n):
        min_index = i
        for j in range(i + 1, n):
            if L[min_index] > L[j]:
                min_index = j
        L[i], L[min_index] = L[min_index], L[i]
        print(L)
    return L

In [82]:
L = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]

In [83]:
SelectionSort(L)

[2, 44, 38, 5, 47, 15, 36, 26, 27, 3, 46, 4, 19, 50, 48]
[2, 3, 38, 5, 47, 15, 36, 26, 27, 44, 46, 4, 19, 50, 48]
[2, 3, 4, 5, 47, 15, 36, 26, 27, 44, 46, 38, 19, 50, 48]
[2, 3, 4, 5, 47, 15, 36, 26, 27, 44, 46, 38, 19, 50, 48]
[2, 3, 4, 5, 15, 47, 36, 26, 27, 44, 46, 38, 19, 50, 48]
[2, 3, 4, 5, 15, 19, 36, 26, 27, 44, 46, 38, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 36, 27, 44, 46, 38, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 44, 46, 38, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 44, 46, 38, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 46, 44, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]


[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]

In [84]:
# Insertion Sort

In [85]:
def InsertionSort(L):
    n = len(L)
    for i in range(1, n):
        key = L[i]
        j = i - 1
        while j >= 0 and key < L[j]:
            L[j + 1] = L[j]
            j -= 1
        L[j + 1] = key
        print(L)
    return L

In [86]:
L = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]

In [87]:
InsertionSort(L)

[3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
[3, 38, 44, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
[3, 5, 38, 44, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
[3, 5, 38, 44, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]
[3, 5, 15, 38, 44, 47, 36, 26, 27, 2, 46, 4, 19, 50, 48]
[3, 5, 15, 36, 38, 44, 47, 26, 27, 2, 46, 4, 19, 50, 48]
[3, 5, 15, 26, 36, 38, 44, 47, 27, 2, 46, 4, 19, 50, 48]
[3, 5, 15, 26, 27, 36, 38, 44, 47, 2, 46, 4, 19, 50, 48]
[2, 3, 5, 15, 26, 27, 36, 38, 44, 47, 46, 4, 19, 50, 48]
[2, 3, 5, 15, 26, 27, 36, 38, 44, 46, 47, 4, 19, 50, 48]
[2, 3, 4, 5, 15, 26, 27, 36, 38, 44, 46, 47, 19, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 50, 48]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]


[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]

In [88]:
# Merge Sort

In [89]:
def MergeSort(L):
    if len(L) > 1:
        mid = len(L) // 2
        left = L[:mid]
        right = L[mid:]
        
        MergeSort(left)
        MergeSort(right)
        
        i = j = k = 0
        
        while i < len(left) and j < len(right):
            if left[i] < right[j]:
                L[k] = left[i]
                i += 1
            else:
                L[k] = right[j]
                j += 1
            k += 1
            
        while i < len(left):
            L[k] = left[i]
            i += 1
            k += 1
        
        while j < len(right):
            L[k] = right[j]
            j += 1
            k += 1
            
        print(L)

In [90]:
L = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]

In [91]:
MergeSort(L)

[38, 44]
[3, 38, 44]
[5, 47]
[15, 36]
[5, 15, 36, 47]
[3, 5, 15, 36, 38, 44, 47]
[26, 27]
[2, 46]
[2, 26, 27, 46]
[4, 19]
[48, 50]
[4, 19, 48, 50]
[2, 4, 19, 26, 27, 46, 48, 50]
[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]


In [92]:
# Quick Sort

In [93]:
def QuickSort(L):
    if len(L) <= 1:
        return L
    else:
        pivot = L[0]
        less = [i for i in L[1:] if i <= pivot]
        greater = [i for i in L[1:] if i > pivot]
        return QuickSort(less) + [pivot] + QuickSort(greater)

In [94]:
L = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]

In [95]:
QuickSort(L)

[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]

In [96]:
# Heap Sort

In [97]:
import heapq
def HeapSort(L):
    in_list = []
    for value in L:
        heapq.heappush(in_list, value)
    return [heapq.heappop(in_list) for i in range(len(in_list))]

In [98]:
L = [3, 44, 38, 5, 47, 15, 36, 26, 27, 2, 46, 4, 19, 50, 48]

In [99]:
HeapSort(L)

[2, 3, 4, 5, 15, 19, 26, 27, 36, 38, 44, 46, 47, 48, 50]