Pandas
- Fast, flexible, and designed to work with tabular data.
- Designed to make work easier with structured data.

--- Robust for loading the data from files like CSV, Excel etc.

--- Easy handling of any kind of missing values in the data (NaN Values)

--- Size Mutability: Columns can be inserted and deleted

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
pd.__version__

'2.1.4'

In [None]:
# Pandas Dataframe - A two-D Structure where data is aligned in a tabular form.

In [None]:
#First series of df(DataFrame)
data = np.array(['a','b','c','d'])
s1 = pd.Series(data)
print(s1)

0    a
1    b
2    c
3    d
dtype: object


In [None]:
num_series = pd.Series([1,2,3,4], index=['a','b','c','d'])
print(num_series)

a    1
b    2
c    3
d    4
dtype: int64


In [None]:
#First Pandas DataFrame
two_d = np.random.randint(5,10, (3,4))
first_df = pd.DataFrame(two_d, columns = ["a", "b", "c", "d"])
print(first_df)

   a  b  c  d
0  6  9  6  5
1  5  5  6  8
2  5  7  6  5


In [None]:
two_d = np.random.randint(10,35, (3,4))
hello_df = pd.DataFrame(two_d, columns = ["a", "b", "c", "d"], index = ["A", "B", "C"])
print(hello_df)
#

    a   b   c   d
A  13  14  33  15
B  34  26  32  23
C  28  12  19  29


In [2]:
num_series = pd.Series([1,2,3,4,5,6], index = ['a','b','c','d','e','f'], name = "Number")
df = pd.DataFrame(num_series)
print(df)

   Number
a       1
b       2
c       3
d       4
e       5
f       6


In [3]:
dilip_fav = np.random.randint(10,35, (3,4))
dilip_df = pd.DataFrame(dilip_fav, columns = ["a", "b", "c", "d"], index = ["English", "Telugu", "Social"])
print(dilip_df)

          a   b   c   d
English  23  19  24  17
Telugu   22  14  13  15
Social   18  29  25  13


In [None]:
# prompt: create a dataframe with 2 columns and 10 rows

import pandas as pd
import numpy as np

data = np.random.rand(10, 2)  # Generate random data
df = pd.DataFrame(data, columns=['Column1', 'Column2'])
print(df)


    Column1   Column2
0  0.630017  0.381914
1  0.681989  0.153395
2  0.734926  0.897958
3  0.220032  0.300218
4  0.965603  0.784941
5  0.476778  0.287787
6  0.869150  0.906831
7  0.569745  0.547062
8  0.076127  0.822950
9  0.616582  0.499658


In [None]:
list_of_lists = [["Dilip", 24], ["Vasu", 2], ["Neimisha", 27]]
df = pd.DataFrame(list_of_lists, columns = ["Name", "Age"], index = (1,2,3))
print(df)

       Name  Age
1     Dilip   24
2      Vasu    2
3  Neimisha   27


In [None]:
employee_dict = {"Employee Name": ["Dilip", "Vasu", "Neimisha", "Kishore"], "Age": [24, 2, 27, 31], "Income $": [65000,
                                                                                                               55000, 60000, 85000]}
df = pd.DataFrame(employee_dict)
print(df)

  Employee Name  Age  Income $
0         Dilip   24     65000
1          Vasu    2     55000
2      Neimisha   27     60000
3       Kishore   31     85000


In [None]:
series_dict = {"First Series": pd.Series([1,2,3,4]), "Second Series": pd.Series([5,6,7,8])}
df = pd.DataFrame(series_dict)
print(df)

   First Series  Second Series
0             1              5
1             2              6
2             3              7
3             4              8


In [None]:
list_of_dict = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]
df = pd.DataFrame(list_of_dict, index = (1,2))
print(df)

   a   b     c
1  1   2   NaN
2  5  10  20.0


In [None]:
list_a = ["Guntur Kaaram", "Khaleja", "Athadu"]
list_b = ["1", "Businessman", "Pokiri"]
list_of_tuples = list(zip(list_a, list_b))
#print(list_of_tuples)
df = pd.DataFrame(list_of_tuples, columns = ["MBx3V", "MBxPuri"], index = (1,2,3))
print(df)

           MBx3V      MBxPuri
1  Guntur Kaaram            1
2        Khaleja  Businessman
3         Athadu       Pokiri


In [14]:
#Create a DataFrame with 100 rows and 5 Different columns
our_array = np.random.randn(100,5)
#print(our_array)
df = pd.DataFrame(our_array, columns = ["a", "b", "c", "d", "e"])
print(df)

           a         b         c         d         e
0   1.135128 -0.389427  0.069412  1.025060  1.095139
1  -0.353668  1.751153 -0.281406 -1.042236 -0.713194
2   0.434388  0.942609  0.847914 -1.481112  1.203859
3  -0.838867 -1.523949 -0.670761 -1.196524  0.623472
4  -0.783075 -0.694598 -0.232646 -0.317736  0.054314
..       ...       ...       ...       ...       ...
95 -1.015670 -0.374060 -0.840605  0.023194  1.332374
96  0.418332  1.260845  0.145041 -1.585094 -1.749945
97  1.206349 -1.196643 -1.715666  0.781166  0.709814
98  1.174557  0.042321  0.441976  0.041867 -0.606838
99 -0.757022  0.409584  0.216632 -1.102426  0.619365

[100 rows x 5 columns]


In [15]:
df.head()

Unnamed: 0,a,b,c,d,e
0,1.135128,-0.389427,0.069412,1.02506,1.095139
1,-0.353668,1.751153,-0.281406,-1.042236,-0.713194
2,0.434388,0.942609,0.847914,-1.481112,1.203859
3,-0.838867,-1.523949,-0.670761,-1.196524,0.623472
4,-0.783075,-0.694598,-0.232646,-0.317736,0.054314


In [16]:
df.head(10)

Unnamed: 0,a,b,c,d,e
0,1.135128,-0.389427,0.069412,1.02506,1.095139
1,-0.353668,1.751153,-0.281406,-1.042236,-0.713194
2,0.434388,0.942609,0.847914,-1.481112,1.203859
3,-0.838867,-1.523949,-0.670761,-1.196524,0.623472
4,-0.783075,-0.694598,-0.232646,-0.317736,0.054314
5,0.757774,-0.308014,0.072281,0.419737,-0.214242
6,-0.575826,0.40436,0.964738,0.876069,-1.308593
7,0.374758,0.752128,2.068971,-1.167135,1.394655
8,0.479973,-1.136332,1.375154,1.229343,0.196209
9,-1.521759,-0.397597,3.171378,-0.633033,0.739386


In [17]:
df.tail()

Unnamed: 0,a,b,c,d,e
95,-1.01567,-0.37406,-0.840605,0.023194,1.332374
96,0.418332,1.260845,0.145041,-1.585094,-1.749945
97,1.206349,-1.196643,-1.715666,0.781166,0.709814
98,1.174557,0.042321,0.441976,0.041867,-0.606838
99,-0.757022,0.409584,0.216632,-1.102426,0.619365


In [19]:
df.columns

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   a       100 non-null    float64
 1   b       100 non-null    float64
 2   c       100 non-null    float64
 3   d       100 non-null    float64
 4   e       100 non-null    float64
dtypes: float64(5)
memory usage: 4.0 KB


In [8]:
df.shape

(100, 5)

In [20]:
df.rename(columns = {"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"})

Unnamed: 0,A,B,C,D,E
0,1.135128,-0.389427,0.069412,1.025060,1.095139
1,-0.353668,1.751153,-0.281406,-1.042236,-0.713194
2,0.434388,0.942609,0.847914,-1.481112,1.203859
3,-0.838867,-1.523949,-0.670761,-1.196524,0.623472
4,-0.783075,-0.694598,-0.232646,-0.317736,0.054314
...,...,...,...,...,...
95,-1.015670,-0.374060,-0.840605,0.023194,1.332374
96,0.418332,1.260845,0.145041,-1.585094,-1.749945
97,1.206349,-1.196643,-1.715666,0.781166,0.709814
98,1.174557,0.042321,0.441976,0.041867,-0.606838


In [21]:
df.head()

Unnamed: 0,a,b,c,d,e
0,1.135128,-0.389427,0.069412,1.02506,1.095139
1,-0.353668,1.751153,-0.281406,-1.042236,-0.713194
2,0.434388,0.942609,0.847914,-1.481112,1.203859
3,-0.838867,-1.523949,-0.670761,-1.196524,0.623472
4,-0.783075,-0.694598,-0.232646,-0.317736,0.054314


In [22]:
#Importing dataset into Colab
path = "/content/Ecommerce Customers.csv"
df = pd.read_csv(path)

In [23]:
df.head()

Unnamed: 0,Email,Address,Avatar,Avg. Session Length,Time on App,Time on Website,Length of Membership,Yearly Amount Spent
0,mstephenson@fernandez.com,"835 Frank Tunnel\nWrightmouth, MI 82180-9605",Violet,34.497268,12.655651,39.577668,4.082621,587.951054
1,hduke@hotmail.com,"4547 Archer Common\nDiazchester, CA 06566-8576",DarkGreen,31.926272,11.109461,37.268959,2.664034,392.204933
2,pallen@yahoo.com,"24645 Valerie Unions Suite 582\nCobbborough, D...",Bisque,33.000915,11.330278,37.110597,4.104543,487.547505
3,riverarebecca@gmail.com,"1414 David Throughway\nPort Jason, OH 22070-1220",SaddleBrown,34.305557,13.717514,36.721283,3.120179,581.852344
4,mstephens@davidson-herman.com,"14023 Rodriguez Passage\nPort Jacobville, PR 3...",MediumAquaMarine,33.330673,12.795189,37.536653,4.446308,599.406092


In [25]:
imdb_df = pd.read_csv("/content/imdb_data.csv")

In [26]:
imdb_df.head()

Unnamed: 0,id,belongs_to_collection,budget,genres,homepage,imdb_id,original_language,original_title,overview,popularity,...,release_date,runtime,spoken_languages,status,tagline,title,Keywords,cast,crew,revenue
0,1,"[{'id': 313576, 'name': 'Hot Tub Time Machine ...",14000000,"[{'id': 35, 'name': 'Comedy'}]",,tt2637294,en,Hot Tub Time Machine 2,"When Lou, who has become the ""father of the In...",6.575393,...,2/20/15,93.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The Laws of Space and Time are About to be Vio...,Hot Tub Time Machine 2,"[{'id': 4379, 'name': 'time travel'}, {'id': 9...","[{'cast_id': 4, 'character': 'Lou', 'credit_id...","[{'credit_id': '59ac067c92514107af02c8c8', 'de...",12314651
1,2,"[{'id': 107674, 'name': 'The Princess Diaries ...",40000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,tt0368933,en,The Princess Diaries 2: Royal Engagement,Mia Thermopolis is now a college graduate and ...,8.248895,...,8/6/04,113.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,It can take a lifetime to find true love; she'...,The Princess Diaries 2: Royal Engagement,"[{'id': 2505, 'name': 'coronation'}, {'id': 42...","[{'cast_id': 1, 'character': 'Mia Thermopolis'...","[{'credit_id': '52fe43fe9251416c7502563d', 'de...",95149435
2,3,,3300000,"[{'id': 18, 'name': 'Drama'}]",http://sonyclassics.com/whiplash/,tt2582802,en,Whiplash,"Under the direction of a ruthless instructor, ...",64.29999,...,10/10/14,105.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The road to greatness can take you to the edge.,Whiplash,"[{'id': 1416, 'name': 'jazz'}, {'id': 1523, 'n...","[{'cast_id': 5, 'character': 'Andrew Neimann',...","[{'credit_id': '54d5356ec3a3683ba0000039', 'de...",13092000
3,4,,1200000,"[{'id': 53, 'name': 'Thriller'}, {'id': 18, 'n...",http://kahaanithefilm.com/,tt1821480,hi,Kahaani,Vidya Bagchi (Vidya Balan) arrives in Kolkata ...,3.174936,...,3/9/12,122.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,,Kahaani,"[{'id': 10092, 'name': 'mystery'}, {'id': 1054...","[{'cast_id': 1, 'character': 'Vidya Bagchi', '...","[{'credit_id': '52fe48779251416c9108d6eb', 'de...",16000000
4,5,,0,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",,tt1380152,ko,마린보이,Marine Boy is the story of a former national s...,1.14807,...,2/5/09,118.0,"[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]",Released,,Marine Boy,,"[{'cast_id': 3, 'character': 'Chun-soo', 'cred...","[{'credit_id': '52fe464b9251416c75073b43', 'de...",3923970
