In [1]:
import pandas as pd

In [2]:
series_1 = pd.Series([2,9,0,1])                # Creating a series object
print(series_1.values)                         # Print values of the series object

[2 9 0 1]


In [3]:
series_1.index                # Default index of the series object

RangeIndex(start=0, stop=4, step=1)

In [4]:
series_1.index = ['a','b','c','d']                 #Settnig index of the series object

In [5]:
series_1['d']                # Fetching elemnet using new index

1

In [6]:
# Creating dataframe using pandas
class_data = {'Names':['John','Ryan','Emily'],
             'Standard': [7,5,8],
             'Subject': ['English','Mathematics','Science']}

class_df = pd.DataFrame(class_data, index = ['Student1','Student2','Student3'],
                       columns = ['Names','Standard','Subject'])

In [7]:
print(class_df)

          Names  Standard      Subject
Student1   John         7      English
Student2   Ryan         5  Mathematics
Student3  Emily         8      Science


In [8]:
class_df.Names

Student1     John
Student2     Ryan
Student3    Emily
Name: Names, dtype: object

In [9]:
# Add new entry to the dataframe
import numpy as np
class_df.ix['Student4'] = ['Robin', np.nan, 'History']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until


In [10]:
class_df.T                # Take transpose of the dataframe

Unnamed: 0,Student1,Student2,Student3,Student4
Names,John,Ryan,Emily,Robin
Standard,7,5,8,
Subject,English,Mathematics,Science,History


In [11]:
class_df.sort_values(by='Standard')   # Sorting of rows by one column

Unnamed: 0,Names,Standard,Subject
Student2,Ryan,5.0,Mathematics
Student1,John,7.0,English
Student3,Emily,8.0,Science
Student4,Robin,,History


In [12]:
# Adding one more column to the dataframe as Series object
col_entry = pd.Series(['A','B','A+','C'], 
                      index=['Student1','Student2','Student3','Student4' ] )
class_df['Grade'] = col_entry

In [13]:
print(class_df)

          Names  Standard      Subject Grade
Student1   John       7.0      English     A
Student2   Ryan       5.0  Mathematics     B
Student3  Emily       8.0      Science    A+
Student4  Robin       NaN      History     C


In [14]:
# Filling the missing entries in the dataframe, inplace
class_df.fillna(10, inplace=True)
print(class_df)

          Names  Standard      Subject Grade
Student1   John       7.0      English     A
Student2   Ryan       5.0  Mathematics     B
Student3  Emily       8.0      Science    A+
Student4  Robin      10.0      History     C


In [15]:
# Concatenation of 2 dataframes
student_age = pd.DataFrame(data = {'Age': [13,10,15,18]} ,
                           index=['Student1','Student2','Student3','Student4' ] )

In [16]:
print(student_age)

          Age
Student1   13
Student2   10
Student3   15
Student4   18


In [17]:
class_data = pd.concat([class_df, student_age ], axis = 1)
print(class_data)

          Names  Standard      Subject Grade  Age
Student1   John       7.0      English     A   13
Student2   Ryan       5.0  Mathematics     B   10
Student3  Emily       8.0      Science    A+   15
Student4  Robin      10.0      History     C   18


In [18]:
# MAP Function
class_data['Subject'] = class_data['Subject'].map(lambda x : x + 'Sub')
class_data['Subject']

Student1        EnglishSub
Student2    MathematicsSub
Student3        ScienceSub
Student4        HistorySub
Name: Subject, dtype: object

In [19]:
# APPLY Function
def age_add(x):                 # Defining a new function which will increment the age by 1
    return(x+1)

print('-----Old values-----')
print(class_data['Age'])
print('-----New values-----')
print(class_data['Age'].apply(age_add))          # Applying the age function on top of the age column

-----Old values-----
Student1    13
Student2    10
Student3    15
Student4    18
Name: Age, dtype: int64
-----New values-----
Student1    14
Student2    11
Student3    16
Student4    19
Name: Age, dtype: int64


In [20]:
# Changing datatype of the column
class_data['Grade'] = class_data['Grade'].astype('category')
class_data.Grade.dtypes

category

In [21]:
# Storing the results 
class_data.to_csv('class_dataset.csv', index=False)