#### Series is a one-dimensional labeled array capable of holding data of any type (integer, string, float, python objects, etc.). The axis labels are collectively called index.

In [1]:
import pandas as pd
import numpy as np

In [2]:
a = ['Alex','Jack','Jeff']

In [3]:
pd.Series(a)

0    Alex
1    Jack
2    Jeff
dtype: object

In [4]:
a = ['Alex','Jack',None]

In [5]:
pd.Series(a)

0    Alex
1    Jack
2    None
dtype: object

In [12]:
a = [1,2,None]

In [18]:
d = pd.Series(a)

In [17]:
d

0    1.0
1    2.0
2    NaN
dtype: float64

In [8]:
np.nan == np.nan

False

In [9]:
np.nan == None

False

In [16]:
np.isnan(np.nan)

True

In [15]:
e = d[np.isnan(d)]
e

2   NaN
dtype: float64

In [21]:
dic = {'Alex':'Alex@gmail.com','John':'John12@gmail.com','Jeff':'Jeff@hotmail.com'}

In [22]:
dt = pd.Series(dic)

In [23]:
dt

Alex      Alex@gmail.com
John    John12@gmail.com
Jeff    Jeff@hotmail.com
dtype: object

In [24]:
dt.index

Index(['Alex', 'John', 'Jeff'], dtype='object')

In [25]:
dt['Alex']

'Alex@gmail.com'

In [26]:
data = [('Alex','Nick'),('Jack','Adam'),('Andrew','Jeff')]

pd.Series(data)

0      (Alex, Nick)
1      (Jack, Adam)
2    (Andrew, Jeff)
dtype: object

In [27]:
pd.Series(['Physics','Chemistry','Biology'],index = ['Alex','Andrew','Nick'])

Alex        Physics
Andrew    Chemistry
Nick        Biology
dtype: object

In [28]:
data = {'Alex':'Alex@gmail.com','Andrew':'Andrew123@gmail.com','John':'John12@gmail.com'}

data = pd.Series(data,index = ['Alex','John','Sam'])

In [29]:
data

Alex      Alex@gmail.com
John    John12@gmail.com
Sam                  NaN
dtype: object

In [30]:
data.iloc[1]

'John12@gmail.com'

In [31]:
data.loc['John']

'John12@gmail.com'

In [32]:
data[1]

'John12@gmail.com'

In [33]:
data['John']

'John12@gmail.com'

In [34]:
student_data = {
    90:'Physics',
    80:'Chemistry',
    100:'Biology'
}
stud_dt = pd.Series(student_data)

In [35]:
stud_dt.iloc[0]

'Physics'

#### Add rows to a series

In [36]:
s = pd.Series([1,2,3])
s.loc['Add'] = 8
s

0      1
1      2
2      3
Add    8
dtype: int64

#### Append two series

In [37]:
data = {'Alex':'Alex@gmail.com','Andrew':'Andrew122@gmail.com','John':'John@hotmail.com'}
d = pd.Series(data)

In [38]:
d1 = pd.Series(['Abc@gmail.com','xcy@gmail.com','pop@gmail.com'], index = ['kelly','kelly','kelly'])
d1

kelly    Abc@gmail.com
kelly    xcy@gmail.com
kelly    pop@gmail.com
dtype: object

In [60]:
all_data = d.append(d1)

In [61]:
all_data

Alex           Alex@gmail.com
Andrew    Andrew122@gmail.com
John         John@hotmail.com
kelly           Abc@gmail.com
kelly           xcy@gmail.com
kelly           pop@gmail.com
dtype: object

In [63]:
all_data.loc['kelly']

kelly    Abc@gmail.com
kelly    xcy@gmail.com
kelly    pop@gmail.com
dtype: object

#### The Dataframe is conceptually a two- dimensional series object, where there's an index and multiple columns of content with each column having a label.

In [40]:
record1 = pd.Series({'Name': 'Alice',
                        'Class': 'Physics',
                        'Score': 85})
record2 = pd.Series({'Name': 'Jack',
                        'Class': 'Chemistry',
                        'Score': 82})
record3 = pd.Series({'Name': 'Helen',
                        'Class': 'Biology',
                        'Score': 90})

In [41]:
pd.DataFrame([record1,record2,record3], index = ['Student1','Student2','Student3'])

Unnamed: 0,Name,Class,Score
Student1,Alice,Physics,85
Student2,Jack,Chemistry,82
Student3,Helen,Biology,90


In [42]:
students = [{'Name': 'Alice',
              'Class': 'Physics',
              'Score': 85},
            {'Name': 'Jack',
             'Class': 'Chemistry',
             'Score': 82},
            {'Name': 'Helen',
             'Class': 'Biology',
             'Score': 90}]

df = pd.DataFrame(students,index= ['Student1','Student2','Student1'])

In [43]:
type(df.loc['Student2'])

pandas.core.series.Series

In [44]:
type(df.loc['Student1'])

pandas.core.frame.DataFrame

In [45]:
df.loc['Student1','Name']

Student1    Alice
Student1    Helen
Name: Name, dtype: object

In [46]:
df

Unnamed: 0,Name,Class,Score
Student1,Alice,Physics,85
Student2,Jack,Chemistry,82
Student1,Helen,Biology,90


In [47]:
df1 = df.T

In [48]:
df1.loc['Name']

Student1    Alice
Student2     Jack
Student1    Helen
Name: Name, dtype: object

In [49]:
df.loc['Student1']['Name']

Student1    Alice
Student1    Helen
Name: Name, dtype: object

In [50]:
df.loc[:,'Name']

Student1    Alice
Student2     Jack
Student1    Helen
Name: Name, dtype: object

In [51]:
df.loc[:,['Name','Score']]

Unnamed: 0,Name,Score
Student1,Alice,85
Student2,Jack,82
Student1,Helen,90


In [52]:
df.drop('Student1')

Unnamed: 0,Name,Class,Score
Student2,Jack,Chemistry,82


In [53]:
df

Unnamed: 0,Name,Class,Score
Student1,Alice,Physics,85
Student2,Jack,Chemistry,82
Student1,Helen,Biology,90


### Drop dataframe

In [55]:
copy_df = df.copy()
#Axis = 1 to specify columns
copy_df.drop("Name",inplace = True,axis = 1)

In [56]:
del copy_df['Class']
copy_df

Unnamed: 0,Score
Student1,85
Student2,82
Student1,90


#### Add new columns in data frame

In [58]:
df['Rank'] = None

In [59]:
df

Unnamed: 0,Name,Class,Score,Rank
Student1,Alice,Physics,85,
Student2,Jack,Chemistry,82,
Student1,Helen,Biology,90,


### Dataframe indexing and loading

In [60]:
df = pd.read_csv('datasets/Admission_Predict.csv')

# And let's look at the first few rows
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [61]:
df = pd.read_csv('datasets/Admission_Predict.csv', index_col=0)
df.head()

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
5,314,103,2,2.0,3.0,8.21,0,0.65


### Rename dataframe

In [118]:
new_df=new_df.rename(columns={'GRE Score':'GRE Score', 'TOEFL Score':'TOEFL Score',
                   'University Rating':'University Rating', 
                   'SOP': 'Statement of Purpose','LOR': 'Letter of Recommendation',
                   'CGPA':'CGPA', 'Research':'Research',
                   'Chance of Admit':'Chance of Admit'})
new_df.head()

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,Statement of Purpose,Letter of Recommendation,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
5,314,103,2,2.0,3.0,8.21,0,0.65


In [111]:
new_df = new_df.rename(columns = {'LOR ': 'Letter of Recommendation'})
new_df

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,Statement of Purpose,Letter of Recommendation,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.00,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.80
5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...
396,324,110,3,3.5,3.5,9.04,1,0.82
397,325,107,3,3.0,3.5,9.11,1,0.84
398,330,116,4,5.0,4.5,9.45,1,0.91
399,312,103,3,3.5,4.0,8.78,0,0.67


In [None]:
new_df = new_df.rename(mapper = str.strip,axis = 1)
new_df.columns

In [122]:
cols = list(df.columns)
rename_cols = [x.lower().strip() for x in cols]
df.columns = rename_cols

In [123]:
df.columns

Index(['gre score', 'toefl score', 'university rating', 'sop', 'lor', 'cgpa',
       'research', 'chance of admit'],
      dtype='object')

### Querying a dataframe