In [59]:
import numpy as np
import pandas as pd

### Introduction to Series and DataFrame, creating them from lists, dicts, and NumPy arrays. Attributes: shape, index, columns.

A Series is like a one-dimensional labeled array (similar to a NumPy array but with labels)

In [60]:
s = pd.Series([10,20,30,40])
s

0    10
1    20
2    30
3    40
dtype: int64

A DataFrame is a 2D table (like Excel), made of rows and columns.

In [62]:
data = {"name":["sam", "james", "vishnu", "ravi"],
         "age":[22,33,23,12]}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age
0,sam,22
1,james,33
2,vishnu,23
3,ravi,12


Creating DataFrame/Series from different sources

In [63]:
s1 = pd.Series([[1,2,3],
                [4,5,6]])
s1

0    [1, 2, 3]
1    [4, 5, 6]
dtype: object

In [66]:
df = pd.DataFrame([[1,2,3],
                   [4,5,6],
                   [7,8,9]],
                   columns=["A","B","C"])
df


Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


From NumPy array

In [69]:
arr = np.arange(1,16).reshape(5,3)
arr

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [13, 14, 15]])

In [70]:
df = pd.DataFrame(arr, columns=["a","b","c"])
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9
3,10,11,12
4,13,14,15


From dict of Series

In [71]:
s1 = pd.Series([1,2,3])
s2 = pd.Series([4,5,6])
df = pd.DataFrame({"col1":s1, "col2":s2})
df

Unnamed: 0,col1,col2
0,1,4
1,2,5
2,3,6


In [73]:
df.shape

(3, 2)

In [74]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [None]:


df.columns

Index(['col1', 'col2'], dtype='object')

In [75]:
d1 = {"City":["Kochi","Delhi","Mumbai"], "Temp":[30, 25, 29]}
df = pd.DataFrame(d1)
df

Unnamed: 0,City,Temp
0,Kochi,30
1,Delhi,25
2,Mumbai,29


Create a Series from a Python list.

In [76]:
l = [1,2,3,4,5]
s = pd.Series(l)
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

Create a DataFrame from a dictionary.

In [77]:
data = {"name":["sam", "james", "vishnu", "ravi"],
         "age":[22,33,23,12]}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age
0,sam,22
1,james,33
2,vishnu,23
3,ravi,12


Display DataFrame shape and column names.

In [78]:
df.shape

(4, 2)

In [79]:
df.columns

Index(['name', 'age'], dtype='object')

Create a Series of numbers 1–10.

In [82]:
s =  pd.Series(range(1,11))
s

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

Create a DataFrame with two columns: Name, Age.

In [83]:
data = {"name":["sam", "james", "vishnu", "ravi"],
         "age":[22,33,23,12]}
df = pd.DataFrame(data)
df

Unnamed: 0,name,age
0,sam,22
1,james,33
2,vishnu,23
3,ravi,12


Convert a NumPy array into a DataFrame.

In [84]:
arr = np.arange(1,16).reshape(5,3)
df = pd.DataFrame(arr, columns=["a","b","c"])
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9
3,10,11,12
4,13,14,15


Rename the columns of a DataFrame.

In [85]:
df.columns = ["A","B","C"]
df

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9
3,10,11,12
4,13,14,15


Add a new column to a DataFrame that calculates the square of another column

In [86]:
df["C_Square"] = df["C"] ** 2
df

Unnamed: 0,A,B,C,C_Square
0,1,2,3,9
1,4,5,6,36
2,7,8,9,81
3,10,11,12,144
4,13,14,15,225
