# Built-In sequences
Python has 5 built-in sequence types:
* Strings : *Immutable*
* Tuples : *Immutable*
* Lists : *Mutable*
* Byte arrays : *Mutable*
* Bytes : *Immutable*

In [1]:
# python in built sequence datatypes
l = []
s = ''
t = ()
ba = bytearray(b'')
b = bytes([])
print(type(l))
print(type(s))
print(type(t))
print(type(ba))
print(type(b))    # only for python 3.x

<type 'list'>
<type 'str'>
<type 'tuple'>
<type 'bytearray'>
<type 'str'>


In [2]:
# Immutable objects in python
# there are collection data types (sets and dictionaries) which are accessed via immutables datatypes
# Concept of deep copying!
myList = {'people','tomey','dingdong'}
slayers = myList.copy()    # it creates a new copy
slayers1 = myList    # it creates a new reference
print(myList)
print(slayers)
print(slayers1)
slayers.discard('people')
slayers.remove('tomey')
slayers1.discard('dingdong')
print(myList)
print(slayers)
print(slayers1)

import copy
m = copy.copy(slayers)
n = copy.deepcopy(slayers)


set(['people', 'dingdong', 'tomey'])
set(['tomey', 'dingdong', 'people'])
set(['people', 'dingdong', 'tomey'])
set(['people', 'tomey'])
set(['dingdong'])
set(['people', 'tomey'])


# Python Strings
Every python object has two output forms :
* String form - readable by humans
* Representational form -  readable by python interpretor


In [3]:
# Unicode strings
# here space is represented by u0020 Unicode - 16 bits
# ASCII code takes 8 bits to represent

print(u'Yo\u0020Bro !')


Yo Bro !


In [4]:
# Methods for strings
# using join is better than using + to concatenate a list
list_of_names = ['Abhishek','Karunesh','Arpit']
print(' '.join(list_of_names))    # join method
print('-<>-'.join(list_of_names))
print('__|__'.join(reversed(list_of_names)))    # reverse method


Abhishek Karunesh Arpit
Abhishek-<>-Karunesh-<>-Arpit
Arpit__|__Karunesh__|__Abhishek


In [5]:
# rjust and ljust methods
name = 'Abhishek Shakya'
print(name.rjust(50,'-'))    # adding at the end
print(name.ljust(50,'-'))    # adding at the start


-----------------------------------Abhishek Shakya
Abhishek Shakya-----------------------------------


In [6]:
# format method
# strings can be formatted
print('{0}, {1}').format('I am the one','No I am not!')

# string mapping or unmapping
# the idea is to combine local() method with format resulting in a key-value list suitable for passing to a function
number = 999
string = 'abhishek'
print('Number {number} is a good {string}'.format(**locals()))


I am the one, No I am not!
Number 999 is a good abhishek


In [7]:
# splitlines function in python
random_string = 'abhishek shakya\nding dong\nhello hello\nsomebody in there'
print(random_string.splitlines())

# using split
print(random_string.split('\n',2))    #    split occured only 2 times
print(random_string.split(' '))
print(random_string.split(' '))

['abhishek shakya', 'ding dong', 'hello hello', 'somebody in there']
['abhishek shakya', 'ding dong', 'hello hello\nsomebody in there']
['abhishek', 'shakya\nding', 'dong\nhello', 'hello\nsomebody', 'in', 'there']
['abhishek', 'shakya\nding', 'dong\nhello', 'hello\nsomebody', 'in', 'there']


In [8]:
# it requires a special condition that number of words or separable entitites should be same
import numpy as np
with open('random_text.txt','r') as txt:
    text = np.genfromtxt(txt,dtype ='string')    # this is good for reading data in columnar structure like logs
print(text)

[['Abhishek' 'shakya']
 ['pulkit' 'shakya']]


In [9]:
# using rsplit function
print(random_string.rsplit(' ',2))    # splitting from right

['abhishek shakya\nding dong\nhello hello\nsomebody', 'in', 'there']


In [10]:
# the strip chars method
sample_string = '\nabhishek\nshakya\n'
print(sample_string)
sample_string.strip('\n')    # removes the trailing spaces from end or front



abhishek
shakya



'abhishek\nshakya'

In [11]:
# reading text file
with open('sample_text.txt','r') as text_object:
    text = text_object.readlines()
print(text)

['Problem Statement\n', 'Welcome to Gardenia - A country which believes in creating a harmony between technology and natural resources. Over the years, Gardenia has come up with ways to utilise natural resources effectively and they have enabled this with use of cutting edge technology.\n', '\xc2\xa0\n', 'The country takes pride in the way it has maintained its natural resources and Gardens. Now, the government of Gardenia wants to use data science to understand the health habits of their citizens. So, they started to capture several variables from various parks in the country.\n', '\xc2\xa0\n', 'They now have this data over a long period of time and are looking for experts like yourself to look at the data and tell, if you can predict how many people will come to a park on a particular day, given the environmental information.\n', '\xc2\xa0\n', 'Go, help the mayor of Gardenia! He has high hopes from Analytics Vidhya community! All the best!\n', '\n']


In [12]:
# creating a tf-idf document using strip() function
import string
import sys

word_dict = {}
def count_unique_word(text):
    for line in text:
        words = line.lower().strip('\n').split(' ')
        for word in words:
            if word not in word_dict:
                word_dict[word] = 1
            else:
                word_dict[word] +=1
    print(list(word_dict))    # printing every key(word) of dictionary 
    # sorted(word_dict) will give keys sorted in ascending order
    for word in sorted(word_dict):
        print(word,'-->',word_dict[word])


if __name__=='__main__':
    count_unique_word(text)

['', 'all', 'help', 'over', 'its', 'information.', 'to', 'health', 'has', 'resources', 'resources.', 'government', 'gardens.', 'cutting', 'they', 'now', 'like', 'technology.', 'best!', 'country.', 'habits', 'people', 'analytics', 'are', 'for', 'ways', 'mayor', 'looking', 'day,', 'years,', 'between', 'various', 'creating', 'of', 'given', 'come', 'on', 'country', 'community!', 'variables', 'period', 'yourself', 'maintained', 'now,', 'use', 'from', 'takes', 'long', 'their', 'way', 'wants', 'statement', 'go,', 'started', 'park', 'so,', 'citizens.', 'environmental', 'understand', 'particular', 'hopes', 'with', 'he', 'pride', '\xc2\xa0', 'look', 'this', 'science', 'enabled', 'up', 'believes', 'will', 'can', 'many', 'problem', 'and', 'vidhya', 'predict', 'it', 'high', 'effectively', 'experts', 'at', 'have', 'in', 'technology', 'if', 'capture', 'gardenia!', '-', 'parks', 'how', 'harmony', 'which', 'you', 'several', 'tell,', 'utilise', 'gardenia', 'welcome', 'data', 'a', 'natural', 'edge', 'tim

In [13]:
import operator
sorted_word = sorted(word_dict.items(),key = operator.itemgetter(1), reverse = True)
for tup in sorted_word:
    print('{}: {}'.format(tup[0],tup[1]))

the: 10
to: 7
of: 5
and: 5
a: 5
has: 3
they: 3
 : 3
in: 3
gardenia: 3
data: 3
natural: 3
over: 2
resources: 2
come: 2
country: 2
use: 2
from: 2
with: 2
this: 2
have: 2
: 1
all: 1
help: 1
its: 1
information.: 1
health: 1
resources.: 1
government: 1
gardens.: 1
cutting: 1
now: 1
like: 1
technology.: 1
best!: 1
country.: 1
habits: 1
people: 1
analytics: 1
are: 1
for: 1
ways: 1
mayor: 1
looking: 1
day,: 1
years,: 1
between: 1
various: 1
creating: 1
given: 1
on: 1
community!: 1
variables: 1
period: 1
yourself: 1
maintained: 1
now,: 1
takes: 1
long: 1
their: 1
way: 1
wants: 1
statement: 1
go,: 1
started: 1
park: 1
so,: 1
citizens.: 1
environmental: 1
understand: 1
particular: 1
hopes: 1
he: 1
pride: 1
look: 1
science: 1
enabled: 1
up: 1
believes: 1
will: 1
can: 1
many: 1
problem: 1
vidhya: 1
predict: 1
it: 1
high: 1
effectively: 1
experts: 1
at: 1
technology: 1
if: 1
capture: 1
gardenia!: 1
-: 1
parks: 1
how: 1
harmony: 1
which: 1
you: 1
several: 1
tell,: 1
utilise: 1
welcome: 1
edge: 1
time

In [14]:
# earlier we found spaces and some punctuation marks interfering our counting process
# so we would use some advanced techniques
import sys
import string

def count_unique_word(text):
    words = {}
    # interseting way of marking up all punctuations, special symbols etc.
    strip =  string.whitespace+string.punctuation+string.digits+'\'"'
    for line in text:
        for word in line.lower().split():
            word = word.strip(strip)
            if len(word)>2:
                words[word] = words.get(word,0)+1
    for word in sorted(words):
        print('{0} occurs {1} times'.format(word,words[word]))
if __name__=='__main__':
    count_unique_word(text)

all occurs 1 times
analytics occurs 1 times
and occurs 5 times
are occurs 1 times
believes occurs 1 times
best occurs 1 times
between occurs 1 times
can occurs 1 times
capture occurs 1 times
citizens occurs 1 times
come occurs 2 times
community occurs 1 times
country occurs 3 times
creating occurs 1 times
cutting occurs 1 times
data occurs 3 times
day occurs 1 times
edge occurs 1 times
effectively occurs 1 times
enabled occurs 1 times
environmental occurs 1 times
experts occurs 1 times
for occurs 1 times
from occurs 2 times
gardenia occurs 4 times
gardens occurs 1 times
given occurs 1 times
government occurs 1 times
habits occurs 1 times
harmony occurs 1 times
has occurs 3 times
have occurs 2 times
health occurs 1 times
help occurs 1 times
high occurs 1 times
hopes occurs 1 times
how occurs 1 times
information occurs 1 times
its occurs 1 times
like occurs 1 times
long occurs 1 times
look occurs 1 times
looking occurs 1 times
maintained occurs 1 times
many occurs 1 times
mayor occurs 1 

In [15]:
# swapcase characters method - swapping capital case with small case and vice-versa
print(random_string.swapcase())
print('----------------------')
# capitalize
print(random_string.capitalize())
print('----------------------')
# lower
print(random_string.lower())
print('----------------------')
# upper
print(random_string.upper())

ABHISHEK SHAKYA
DING DONG
HELLO HELLO
SOMEBODY IN THERE
----------------------
Abhishek shakya
ding dong
hello hello
somebody in there
----------------------
abhishek shakya
ding dong
hello hello
somebody in there
----------------------
ABHISHEK SHAKYA
DING DONG
HELLO HELLO
SOMEBODY IN THERE


In [16]:
# index methods and find methods - there are rfind and rindex functions respectively
print(random_string.find('shakya'))    # returns the first position of sub-string
print(random_string.index('shakya'))    # returns the first position of sub-string

# if string not found
print(random_string.find('Yahoo'))    # returns -1 if sub-string not found
print(random_string.index('Yahoo'))    # throws ValueError exception

9
9
-1


ValueError: substring not found

In [17]:
# the count method - counting occurences of a string
print(random_string.count('hello',0,-1))


2


In [18]:
# replace method in python
random_string.replace('hello','yo')     # you can also mention the frequency here

'abhishek shakya\nding dong\nyo yo\nsomebody in there'

# Tuples
*Immutable* datatype in python with values seperated by commas <br>
**USP**: Strings have a character at every position however, tuple has a reference object at each position


In [19]:
just_a_tuple = 1,2,3,4,1    # can be defined without brackets
print(type(just_a_tuple))

<type 'tuple'>


In [20]:
# methods for tuples
# counting frequency of a word in a tuple
print(just_a_tuple.count(1))

# return index of an element
print(just_a_tuple.index(1))

2
0


In [21]:
# named tuples - they are inside COLLECTION package in python
# they can be used like dictionaries and store data in tabular form like csv
import collections
BigTouple = collections.namedtuple('Food',['name', 'weight', 'price'])
BigTouple_row = BigTouple('carrot','50','20')
print(BigTouple_row.price)

20


In [22]:
# get() gives the value given key as input and if not found, returns the given output
words = {'abhishek':1,'pulkit':2}
print(words.get('donnno',-1))
print(words.get('abhishek',-1))

-1
1


# Lists
Lists are dynamically resizing array and are different from linked lists. They are mutable (unlike strings).

In [67]:
a = [1,2,'abhishek','shirley',True]
b = ['dhinchak',False]
a.append(b)
print(a)    # here appending a list to a list makes it nested

[1, 2, 'abhishek', 'shirley', True, ['dhinchak', False]]


In [68]:
c = [1,2,4]
d = ['abhishek','pulkit']
c.extend(d)    # it appends everything to a list which are iterable (characters of a string)
print(c)    # here extending adds elements of one list to another list

[1, 2, 4, 'abhishek', 'pulkit']


In [69]:
# pop and remove for removing things from list
c.pop()
c.remove(1)
print(c)

[2, 4, 'abhishek']


In [70]:
c+='mummy'    # can add items to a list like this as well
print(c)

[2, 4, 'abhishek', 'm', 'u', 'm', 'm', 'y']


In [71]:
# inserting items to a list to a specified position
c.insert(2, 'papa')
print(c)

[2, 4, 'papa', 'abhishek', 'm', 'u', 'm', 'm', 'y']


In [72]:
c.pop(2)    # removing an item from the list
print(c)

[2, 4, 'abhishek', 'm', 'u', 'm', 'm', 'y']


## Garbage Collection
It's a memory occupied by objects which are no longer referenced and garbage collection is a form of automatic memory management freeing the memory occupied by garbage.

In [73]:
# removes object reference and deleted object is referred to as garbage-collected
del c[0]
print(c)

[4, 'abhishek', 'm', 'u', 'm', 'm', 'y']


In [74]:
print(c.count('m'))
print(c.index('y'))

3
6


In [77]:
c.sort()
print(c)
c.sort(reverse=True)
print(c)
c.reverse()
print(c)

[4, 'abhishek', 'm', 'm', 'm', 'u', 'y']
['y', 'u', 'm', 'm', 'm', 'abhishek', 4]
[4, 'abhishek', 'm', 'm', 'm', 'u', 'y']


In [88]:
# list unpacking
def list_unpacking(a,b,c):
    return a+b+c
lt = [2,3,4]
print(list_unpacking(1,2,3))
print(list_unpacking(1,*lt[1:]))    # starred arguments


6
8


In [106]:
# List comprehensions
# should only be used for one line cases
a = [year for year in range(1800,2004) if year%400==0 or (year%4==0 and year%100!=0)]
print(a)

[1804, 1808, 1812, 1816, 1820, 1824, 1828, 1832, 1836, 1840, 1844, 1848, 1852, 1856, 1860, 1864, 1868, 1872, 1876, 1880, 1884, 1888, 1892, 1896, 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944, 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1996, 2000]


In [113]:
# Runtime analysis of Python lists

def test_1():
    l = []
    for i in range(1000):
        l=l+[i]

def test_2():
    l = []
    for i in range(1000):
        l.append(i)

def test_3():
    l = [i for i in range(1000)]

def test_4():
    l = list(range(1000))
    
if __name__=='__main__':
    import timeit
    t1 = timeit.Timer('test_1()','from __main__ import test_1')
    print('concat ',t1.timeit(number =1000),'milliseconds')
    t2 = timeit.Timer('test_2()','from __main__ import test_2')
    print('append ',t2.timeit(number=1000),'milliseconds')
    t3 = timeit.Timer('test_3()','from __main__ import test_3')
    print('comprehension ',t3.timeit(number=1000),'milliseconds')
    t4 = timeit.Timer('test_4()','from __main__ import test_4')
    print('list range ',t4.timeit(number=1000), 'milliseconds')

('concat ', 1.5946428775787354, 'milliseconds')
('append ', 0.08633708953857422, 'milliseconds')
('comprehension ', 0.03551793098449707, 'milliseconds')
('list range ', 0.011151790618896484, 'milliseconds')


## Properties of List
---
|Operation|Big-O Efficiency|
|:---:|---|
| index | O(1) |
|index assignment | O(1)|
|append | O(1)|
|pop() | O(1)|
|pop(i) | O(n)|
|insert(i,item) | O(n)|
|del operator |  O(n)|
| iteration | O(n)|
|contains (in) | O(n)|
|get slice [x:y] | O(k)|
|del slice | O(n)|
|set slice | O(n+k)|
|reverse | O(n)|
|concatenate | O(k)|
|sort | O(n log n)|
|multiply | O(nk)|

---

# Bytes and Byte array
> bytes similar to string(immutable) and bytearray similar to lists(mutable)
> They are very useful in operations over numbers represented as bits

In [137]:
# Reversing words in a string without reversing the whole sentence (string)

# Method 1
def reversestring_simple(string1 , p1=0 , p2 = None):
    if len(string1)<2:
        return sring1
    p2 = p2 or len(string1)-1
    while p1 < p2:
        aux = string1[p1]
        string1[p1]=string1[p2]
        string1[p2]= aux
        p1+=1
        p2-=1
    return ''.join(string1)

if __name__=='__main__':
    print(reversestring_simple(list('abhishek shakya')))

aykahs kehsihba


In [138]:
b = 'abhishek shakya'.split(' ')
" ".join(reversed(b))

'shakya abhishek'

In [140]:
a = 'abhishek shakya'.split(' ')
a.reverse()
' '.join(a)

'shakya abhishek'

$$ The\ End $$