# <font color='blue'>1. Handling Missing Data </font>

In [None]:
import pandas as pd
import numpy as np

In [None]:
string_data = pd.Series(['aardvark', 'artichoke', np.nan, 'avocado'])

In [None]:
string_data

0     aardvark
1    artichoke
2          NaN
3      avocado
dtype: object

In [None]:
string_data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

When cleaning up the data for analysis, it is often important to do analysis on the missing data itself to identify data collection problems or potential biases in the data caused by missing data.

In [None]:
string_data[0] = None
string_data.isnull()

0     True
1    False
2     True
3    False
dtype: bool

<font color='blue'>**Filtering Out Missing Data**</font>

In [None]:
from numpy import nan as NA
data = pd.Series([1,NA,3.5,NA,7])
data.dropna()

0    1.0
2    3.5
4    7.0
dtype: float64

In [None]:
data[data.notnull()]

0    1.0
2    3.5
4    7.0
dtype: float64

With DataFrame objects, things are a bit more complex. You may want to drop rows
or columns that are all NA or only those containing any NAs. dropna by default drops any row containing a missing value:

In [None]:
data = pd.DataFrame([[1.,6.5,3.],[1.,NA,NA],
                     [NA,NA,NA],[NA,6.5,1.]])

In [None]:
cleanedata = data.dropna()
cleanedata

Unnamed: 0,0,1,2
0,1.0,6.5,3.0


In [None]:
data.dropna(how='all')
data[4] = NA
data

Unnamed: 0,0,1,2,4
0,1.0,6.5,3.0,
1,1.0,,,
2,,,,
3,,6.5,1.0,


In [None]:
data.dropna(axis=1,how='all')

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,1.0


A related way to filter out DataFrame rows tends to concern time series data. Suppose you want to keep only rows containing a certain number of observations. You can
indicate this with the `thresh` argument:

In [None]:
df = pd.DataFrame(np.random.rand(7,3))
df.iloc[:4,1] = NA
df.iloc[:2,2] = NA

In [None]:
df

Unnamed: 0,0,1,2
0,0.684541,,
1,0.643886,,
2,0.146676,,0.4199
3,0.748952,,0.943429
4,0.250528,0.005264,0.703436
5,0.094805,0.85299,0.742841
6,0.345429,0.984876,0.679315


In [None]:
df.dropna()

Unnamed: 0,0,1,2
4,0.250528,0.005264,0.703436
5,0.094805,0.85299,0.742841
6,0.345429,0.984876,0.679315


In [None]:
df.dropna(thresh=2)

Unnamed: 0,0,1,2
2,0.146676,,0.4199
3,0.748952,,0.943429
4,0.250528,0.005264,0.703436
5,0.094805,0.85299,0.742841
6,0.345429,0.984876,0.679315


<font color='blue'>**Filling Out Missing Data**</font>

Rather than filtering out missing data (and potentially discarding other data along with it), you may want to fill in the “holes” in any number of ways. For most purposes, the `fillna` method is the workhorse function to use. Calling fillna with a constant replaces missing values with that value:

In [None]:
df.fillna(0)

Unnamed: 0,0,1,2
0,0.684541,0.0,0.0
1,0.643886,0.0,0.0
2,0.146676,0.0,0.4199
3,0.748952,0.0,0.943429
4,0.250528,0.005264,0.703436
5,0.094805,0.85299,0.742841
6,0.345429,0.984876,0.679315


calling fillna with a dict, you can use a different value to fill the columns

In [None]:
df.fillna({1:0.5,2:0})

Unnamed: 0,0,1,2
0,0.684541,0.5,0.0
1,0.643886,0.5,0.0
2,0.146676,0.5,0.4199
3,0.748952,0.5,0.943429
4,0.250528,0.005264,0.703436
5,0.094805,0.85299,0.742841
6,0.345429,0.984876,0.679315


`fillna` returns a new object, but you can modify the existing object in-place:

In [None]:
_ = df.fillna(0,inplace=True)
df

Unnamed: 0,0,1,2
0,0.684541,0.0,0.0
1,0.643886,0.0,0.0
2,0.146676,0.0,0.4199
3,0.748952,0.0,0.943429
4,0.250528,0.005264,0.703436
5,0.094805,0.85299,0.742841
6,0.345429,0.984876,0.679315


In [None]:
df = pd.DataFrame(np.random.randn(6,3))
df.iloc[2:,1]= NA
df.iloc[4:,2] = NA
df

Unnamed: 0,0,1,2
0,0.87713,-0.585494,-1.355787
1,-0.784295,0.407624,-0.351151
2,-1.124413,,1.122358
3,0.723038,,-1.95975
4,-0.573411,,
5,1.007921,,


In [None]:
df.fillna(method='ffill')

Unnamed: 0,0,1,2
0,0.87713,-0.585494,-1.355787
1,-0.784295,0.407624,-0.351151
2,-1.124413,0.407624,1.122358
3,0.723038,0.407624,-1.95975
4,-0.573411,0.407624,-1.95975
5,1.007921,0.407624,-1.95975


In [None]:
df.fillna(method='ffill',limit=2)

Unnamed: 0,0,1,2
0,0.87713,-0.585494,-1.355787
1,-0.784295,0.407624,-0.351151
2,-1.124413,0.407624,1.122358
3,0.723038,0.407624,-1.95975
4,-0.573411,,-1.95975
5,1.007921,,-1.95975


With fillna you can do lots of other things. For example, you might pass the mean or median value of the series. 

In [None]:
data = pd.Series([1.,NA,3.5,NA,7])
data.fillna(data.mean())

0    1.000000
1    3.833333
2    3.500000
3    3.833333
4    7.000000
dtype: float64

# <font color='blue'>2. Data Transformation </font>

In [None]:
df  = pd.DataFrame({'k1': ['One','Two']*3 + ['Two'],
                    'k2': [1,1,2,3,3,4,4]})

df

Unnamed: 0,k1,k2
0,One,1
1,Two,1
2,One,2
3,Two,3
4,One,3
5,Two,4
6,Two,4


The DataFrame method `duplicated` returns a boolean Series indicating whether each row is a duplicate (has been observed in a previous row) or not:

In [None]:
df.duplicated()

0    False
1    False
2    False
3    False
4    False
5    False
6     True
dtype: bool

In [None]:
df.drop_duplicates()

Unnamed: 0,k1,k2
0,One,1
1,Two,1
2,One,2
3,Two,3
4,One,3
5,Two,4


Both of these methods by default consider all of the columns; alternatively, you can specify any subset of them to detect duplicates. Suppose we had an additional column of values and wanted to filter duplicates only based on the 'k1' column

In [None]:
df['v1'] = range(7)
df

Unnamed: 0,k1,k2,v1
0,One,1,0
1,Two,1,1
2,One,2,2
3,Two,3,3
4,One,3,4
5,Two,4,5
6,Two,4,6


In [None]:
df.drop_duplicates('k1')

Unnamed: 0,k1,k2,v1
0,One,1,0
1,Two,1,1


`duplicated` and `drop_duplicates` by default keep the first observed value combination.Passing `keep='last'` will return the last one:

In [None]:
df.drop_duplicates(['k1','k2'])

Unnamed: 0,k1,k2,v1
0,One,1,0
1,Two,1,1
2,One,2,2
3,Two,3,3
4,One,3,4
5,Two,4,5


In [None]:
df.drop_duplicates(['k1','k2'],keep='last')

Unnamed: 0,k1,k2,v1
0,One,1,0
1,Two,1,1
2,One,2,2
3,Two,3,3
4,One,3,4
6,Two,4,6


<font color='blue'> Tranforming Data Using a Function or Mapping</font>

For many datasets, you may wish to perform some transformation based on the values in an array, Series, or column in a DataFrame. Consider the following hypothetical data collected about various kinds of meat:

In [None]:
data = pd.DataFrame({'Food': ['bacon','pulled pork','bacon',
                              'Pastrami','corned beef','Bacon','pastrami'
                              ,'honey ham','nova lox'],
                     'onces' : [4,3,12,6,7.5,8,3,5,6]})
data

Unnamed: 0,Food,onces
0,bacon,4.0
1,pulled pork,3.0
2,bacon,12.0
3,Pastrami,6.0
4,corned beef,7.5
5,Bacon,8.0
6,pastrami,3.0
7,honey ham,5.0
8,nova lox,6.0


Suppose you wanted to add a column indicating the type of animal that each food came from.

In [None]:
meet_to_animal = {
    'bacon' : 'pig',
    'pulled pork': 'pig',
    'pastrami' :'cow',
    'corned beef' :'cow',
    'honey ham' : 'pig',
    'nova lox' : 'salmon'
}

The `map` method on a Series accepts a function or dict-like object containing  a mapping, but here we have a small problem in that some  of the meats are capitalized and others are not. Thus, we need to convert each value to lower case.
`str.lower`

In [None]:
lowercased = data['Food'].str.lower()
lowercased

0          bacon
1    pulled pork
2          bacon
3       pastrami
4    corned beef
5          bacon
6       pastrami
7      honey ham
8       nova lox
Name: Food, dtype: object

In [None]:
data['animal'] = lowercased.map(meet_to_animal)
data

Unnamed: 0,Food,onces,animal
0,bacon,4.0,pig
1,pulled pork,3.0,pig
2,bacon,12.0,pig
3,Pastrami,6.0,cow
4,corned beef,7.5,cow
5,Bacon,8.0,pig
6,pastrami,3.0,cow
7,honey ham,5.0,pig
8,nova lox,6.0,salmon


Using `map` is a convenient  way to perform element-wise transformations and other data-cleaning related operations.

<font color='blue'>**Replacing Values**</font>

Filling in missing data with the `fillna` method is a special case of more general value replacement. As you’ve already seen,`map` can be used to modify a subset of values in an object but `replace` provides a simpler and more flexible way to do so. Let’s consider
this Series

In [None]:
data = pd.Series([1.,-999.,2.,-999,-1000.,3.])

data

0       1.0
1    -999.0
2       2.0
3    -999.0
4   -1000.0
5       3.0
dtype: float64

The $-999$ values might be sentinel values for missing data. To replace these with `NA` values that pandas understands, we can use replace, producing a new Series (unless you pass inplace=True):

In [None]:
data.replace(-999,np.nan)

0       1.0
1       NaN
2       2.0
3       NaN
4   -1000.0
5       3.0
dtype: float64

To use a different replacement for each value, pass a list of substitutes:

In [None]:
data.replace([-999,-1000],[np.nan,0])

0    1.0
1    NaN
2    2.0
3    NaN
4    0.0
5    3.0
dtype: float64

In [None]:
#The arguments passed can be a dict
data.replace({-999:np.nan,-1000:0})

0    1.0
1    NaN
2    2.0
3    NaN
4    0.0
5    3.0
dtype: float64

<font color='blue'>**Renaming Axis Indexes**</font>

Like values in a Series, axis labels can be similarly transformed by a function or mapping of some form to produce new, differently labeled objects. You can also modify the axes in-place without creating a new data structure.

In [None]:
data = pd.DataFrame(np.arange(12).reshape((3, 4)),
index=['Ohio', 'Colorado', 'New York'],
columns=['one', 'two', 'three', 'four'])

In [None]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
New York,8,9,10,11


In [None]:
transform  = lambda x: x[:4].upper()

In [None]:
data.index = data.index.map(transform)

In [None]:
data

Unnamed: 0,one,two,three,four
OHIO,0,1,2,3
COLO,4,5,6,7
NEW,8,9,10,11


In [None]:
data.rename(index = str.title,columns= str.upper)

Unnamed: 0,ONE,TWO,THREE,FOUR
Ohio,0,1,2,3
Colo,4,5,6,7
New,8,9,10,11


Notably, rename can be used in conjunction with a dict-like object providing new values for a subset of the axis labels

In [None]:
data.rename(index = {'OHIO':'Banana'},
            columns = {'four': 'Nutmeg'})

Unnamed: 0,one,two,three,Nutmeg
INDIANA,0,1,2,3
COLO,4,5,6,7
NEW,8,9,10,11


In [None]:
data.rename(index = {'OHIO':'INDIANA'},inplace=True)


In [None]:
data

Unnamed: 0,one,two,three,four
INDIANA,0,1,2,3
COLO,4,5,6,7
NEW,8,9,10,11


<font color='blue'>**Discretization and Binning**</font>

Continuous data is often discretized or otherwise separated into “bins” for analysis.
Suppose you have data about a group of people in a study, and you want to group
them into discrete age buckets:

In [None]:
ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]

Let’s divide these into bins of 18 to 25, 26 to 35, 36 to 60, and finally 61 and older. To
do so, you have to use cut, a function in pandas:

In [None]:
bins = [18,25,35,60,100]
cat = pd.cut(ages,bins)

In [None]:
cat

[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]
Length: 12
Categories (4, interval[int64, right]): [(18, 25] < (25, 35] < (35, 60] < (60, 100]]

- The object  pandas returns  is a special `Categorical` object. The output  you see describes the bins computed by pandas.cut.
- You can treat it like an array of strings indicating the bin names; internally it contains a categories array specifying the distinct category names along with the labeling of the ages data  in the codes attribute.

In [None]:
cat.codes

array([0, 0, 0, 1, 0, 0, 2, 1, 3, 2, 2, 1], dtype=int8)

In [None]:
cat.categories

IntervalIndex([(18, 25], (25, 35], (35, 60], (60, 100]], dtype='interval[int64, right]')

In [None]:
pd.value_counts(cat)

(18, 25]     5
(25, 35]     3
(35, 60]     3
(60, 100]    1
dtype: int64

In [None]:
pd.cut(ages,[18,26,36,61,100],right=False)

[[18, 26), [18, 26), [18, 26), [26, 36), [18, 26), ..., [26, 36), [61, 100), [36, 61), [36, 61), [26, 36)]
Length: 12
Categories (4, interval[int64, left]): [[18, 26) < [26, 36) < [36, 61) < [61, 100)]

You can also pass your own bin names by passing a list or array to the labels option

In [None]:
group_names = ['Youth','YoungAdult','MiddleAges','Senior']
pd.cut(ages,bins,labels= group_names)

['Youth', 'Youth', 'Youth', 'YoungAdult', 'Youth', ..., 'YoungAdult', 'Senior', 'MiddleAges', 'MiddleAges', 'YoungAdult']
Length: 12
Categories (4, object): ['Youth' < 'YoungAdult' < 'MiddleAges' < 'Senior']

If you pass an integer number of bins instead of explicit bin edges, it will compute equal-length bins based on the minimum and maximum values in the data.
Consider the case of some uniformly distributed data chopped into forths:


In [None]:
data = np.random.rand(20)

cat =pd.cut(data,4,precision=2)

In [None]:
cat.codes

array([3, 2, 2, 0, 1, 1, 2, 3, 0, 1, 2, 0, 3, 3, 0, 0, 1, 0, 1, 3],
      dtype=int8)

In [None]:
cat.categories
# The precision =2 option limits the decimal precision to two digits.

IntervalIndex([(0.15, 0.35], (0.35, 0.55], (0.55, 0.75], (0.75, 0.95]], dtype='interval[float64, right]')

A closely related function, qcut, bins the data based on sample quantiles. Depending  on the distribution of data, using cut will not usually result in bin having the same number of data points.
Since qcut uses sample quantiles instead, by definition you will obtain equal sized bins

In [None]:
data = np.random.randn(1000)  #uniformly distribution
cat = pd.qcut(data,4) # cut into quartiles
cat.categories

IntervalIndex([(-3.221, -0.597], (-0.597, 0.026], (0.026, 0.709], (0.709, 2.362]], dtype='interval[float64, right]')

<font color='blue'>**Detecting the filtering Outliers**</font>

Filtering or transforming outliers is largely a matter of applying array operations

In [None]:
data = pd.DataFrame(np.random.randn(1000,4))
data.describe()

Unnamed: 0,0,1,2,3
count,1000.0,1000.0,1000.0,1000.0
mean,-0.016325,0.04447,-0.025544,0.004567
std,0.983054,1.027443,0.992285,1.056915
min,-2.527104,-3.442131,-4.103695,-3.258188
25%,-0.689838,-0.632355,-0.713466,-0.677504
50%,-0.006495,-0.010455,-0.022935,-0.023575
75%,0.644797,0.687112,0.622783,0.67132
max,2.912163,3.516287,3.345128,3.713733


Suppose you wanted to find values in one of the columns  exceeding 3 in absolute value 

In [None]:
col = data[2]
col[np.abs(col)> 3]

187    3.345128
685    3.174255
714   -3.444494
851   -4.103695
Name: 2, dtype: float64

To select all rows having a value exceeding 3 or -3, you can use the `any` method on a boolean dataframe


In [None]:
data[(np.abs(data)>3).any(1)]

Unnamed: 0,0,1,2,3
10,0.377134,-0.095013,-1.541566,3.371433
24,0.068414,-1.635192,-1.159098,-3.184144
74,-0.437551,-0.468072,-0.233173,-3.258188
174,0.512192,3.516287,0.195234,-2.27626
187,-1.384652,1.079045,3.345128,-1.628471
400,-1.098187,0.361733,-1.044067,-3.047781
484,-0.495225,-3.442131,-0.557973,-2.014235
506,-0.340975,-0.562622,-1.168068,3.374336
570,1.873286,2.110443,-0.647864,3.713733
571,0.429287,-0.159023,1.067707,3.188514


In [None]:
#The statement np.sign(data) produces 1 and –1 values based on whether the values
#in data are positive or negative:
np.sign(data).head()

Unnamed: 0,0,1,2,3
0,-1.0,1.0,-1.0,-1.0
1,-1.0,1.0,1.0,1.0
2,-1.0,1.0,-1.0,-1.0
3,-1.0,1.0,1.0,1.0
4,-1.0,1.0,-1.0,1.0


<font color='blue'>**Permutation and Random Sampling**</font>

Permuting (randomly reordering) a Series or the rows in a DataFrame is easy to do using the `numpy.random.permutation` function. Calling permutation with the length of the axis you want to permute produces an array of integers indicating the new ordering:

In [None]:
df = pd.DataFrame(np.arange(5*4).reshape(5,4))

sampler = np.random.permutation(5)
print(sampler)

[1 4 3 2 0]


In [None]:
df

Unnamed: 0,0,1,2,3
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
4,16,17,18,19


In [None]:
df.take(sampler)

Unnamed: 0,0,1,2,3
1,4,5,6,7
4,16,17,18,19
3,12,13,14,15
2,8,9,10,11
0,0,1,2,3


To select a random subset without replacement, you can use the `sample` method on Series and DataFrame

In [None]:
df.sample(n=len(df))

Unnamed: 0,0,1,2,3
2,8,9,10,11
0,0,1,2,3
4,16,17,18,19
3,12,13,14,15
1,4,5,6,7


To generate a sample  with replacement( to allow repeat choices), pass replace =True to sample 


In [None]:
choices = pd.Series([5,7,-1,6,4])
draws = choices.sample(n=10,replace=True)
draws

2   -1
1    7
4    4
4    4
4    4
0    5
1    7
4    4
3    6
1    7
dtype: int64

<font color='blue'>**Computing Indicator/Dummy Variables**</font>

Another type of transformation for statistical modeling or machine learning applications is converting a categorical variable into a "dummy" or "indicator" matrix.
If a column in a DataFrame has $k$ distinct values, you would derive a matrix or DataFrame with $k$ columns  containing all 1s and 0s.

In [None]:
df = pd.DataFrame({'Key':['b','b','a','c','a','b'],
                   'data' : range(6)})
df

Unnamed: 0,Key,data
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,b,5


In [None]:
pd.get_dummies(df['Key'])

Unnamed: 0,a,b,c
0,0,1,0
1,0,1,0
2,1,0,0
3,0,0,1
4,1,0,0
5,0,1,0


In some cases, you may want to add a prefix to the columns in the indicator Data‐Frame, which can then be merged with the other data. get_dummies has a prefix argument
for doing this:

In [None]:
dummies = pd.get_dummies(df['Key'],prefix='key')

In [None]:
df_with_dummies = df[['data']].join(dummies)

In [None]:
df_with_dummies

Unnamed: 0,data,key_a,key_b,key_c
0,0,0,1,0
1,1,0,1,0
2,2,1,0,0
3,3,0,0,1
4,4,1,0,0
5,5,0,1,0


In [None]:
import random
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import seaborn as sns
import random
import matplotlib.pyplot as plt

%cd /content/drive/My\ Drive/colab_notebooks/machine_learning/Interview_prep/Python/

/content/drive/My Drive/colab_notebooks/machine_learning/Interview_prep/Python


In [None]:
mnames = ['movie_id', 'title', 'genres']

In [None]:
movies = pd.read_table('movies.dat', sep='::',header=None, names=mnames,encoding='latin-1')

  return func(*args, **kwargs)


In [None]:
movies[:10]

Unnamed: 0,movie_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children's
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


- Adding indicator variable for each genre requires a little bit data wragling. 
- First we extract the list of unique genres from the df.

In [None]:
all_genres = []
for x in movies.genres:
  all_genres.extend(x.split('|'))

In [None]:
genres = pd.unique(all_genres)

In [None]:
genres

array(['Animation', "Children's", 'Comedy', 'Adventure', 'Fantasy',
       'Romance', 'Drama', 'Action', 'Crime', 'Thriller', 'Horror',
       'Sci-Fi', 'Documentary', 'War', 'Musical', 'Mystery', 'Film-Noir',
       'Western'], dtype=object)

In [None]:
#One way to construct the indicator DataFrame is to start with a DataFrame of all
#zeros:

zero_matrix = np.zeros((len(movies),len(genres)))
dummies = pd.DataFrame(zero_matrix,columns=genres)

In [None]:
# Now iterate through each movie and set entries in each row of dummies to 1.

In [None]:
gen = movies.genres[0]
print(gen)

Animation|Children's|Comedy


In [None]:
gen.split('|')

['Animation', "Children's", 'Comedy']

In [None]:
dummies.columns.get_indexer(gen.split('|'))

array([0, 1, 2])

In [None]:
for i,gen in enumerate(movies.genres):
  indices = dummies.columns.get_indexer(gen.split('|'))
  dummies.iloc[i,indices] = 1 

In [None]:
# As before you can combine this with movies
movies_windic = movies.join(dummies.add_prefix('Genre_'))

In [None]:
movies_windic.iloc[0]

movie_id                                       1
title                           Toy Story (1995)
genres               Animation|Children's|Comedy
Genre_Animation                              1.0
Genre_Children's                             1.0
Genre_Comedy                                 1.0
Genre_Adventure                              0.0
Genre_Fantasy                                0.0
Genre_Romance                                0.0
Genre_Drama                                  0.0
Genre_Action                                 0.0
Genre_Crime                                  0.0
Genre_Thriller                               0.0
Genre_Horror                                 0.0
Genre_Sci-Fi                                 0.0
Genre_Documentary                            0.0
Genre_War                                    0.0
Genre_Musical                                0.0
Genre_Mystery                                0.0
Genre_Film-Noir                              0.0
Genre_Western       

A useful recipi for statistical applications  is to combine get_dummies with a discretization function cut.

In [None]:
np.random.seed(12345)
values = np.random.rand(10)
values

array([0.92961609, 0.31637555, 0.18391881, 0.20456028, 0.56772503,
       0.5955447 , 0.96451452, 0.6531771 , 0.74890664, 0.65356987])

In [None]:
bins = [0,0.2,0.4,0.6,0.8,1]
pd.get_dummies(pd.cut(values,bins))

Unnamed: 0,"(0.0, 0.2]","(0.2, 0.4]","(0.4, 0.6]","(0.6, 0.8]","(0.8, 1.0]"
0,0,0,0,0,1
1,0,1,0,0,0
2,1,0,0,0,0
3,0,1,0,0,0
4,0,0,1,0,0
5,0,0,1,0,0
6,0,0,0,0,1
7,0,0,0,1,0
8,0,0,0,1,0
9,0,0,0,1,0


#<font color='blue'>3. String Manipulation </font>

Skipping this section for now 