## Creating DataFrames in Pandas

##### From a Dictionary of Lists

In [3]:
import pandas as pd

data = {
    "Name": ['Hannan', 'Mohid', 'Hassan', 'Talha'],
    "Age": [20, 30, 22, 26],
    "Score": [120, 102, 98, 112]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Score
0,Hannan,20,120
1,Mohid,30,102
2,Hassan,22,98
3,Talha,26,112


##### From a List of Dictionary

In [4]:
import pandas as pd

data = [
    {"Name": 'Hannan', "Age": 22, "City": 'Faisalabad'},
    {"Name": 'Talha', "Age": 28, "City": 'Islamabad'},
    {"Name": 'Mohid', "Age": 26, "City": 'Lahore'}
]
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Hannan,22,Faisalabad
1,Talha,28,Islamabad
2,Mohid,26,Lahore


##### From a List of Lists (with columns name)

In [5]:
import pandas as pd

data = [
    ["Ali", 20, 'Karachi'],
    ["Hassan", 25, 'Lahore'],
    ["John", 24, 'New York']
]

df = pd.DataFrame(data, columns=['Name', 'Age', 'City'])
df

Unnamed: 0,Name,Age,City
0,Ali,20,Karachi
1,Hassan,25,Lahore
2,John,24,New York


##### From a Dictionary of Series

In [6]:
import pandas as pd

s1 = pd.Series([20, 30, 43], index=['a', 'b', 'c'])
s2 = pd.Series(["Lahore","Karachi","Islamabad"], index=['a', 'b', 'c'])

df = pd.DataFrame({"Age": s1, "City": s2})
df

Unnamed: 0,Age,City
a,20,Lahore
b,30,Karachi
c,43,Islamabad


##### From NumPy Arrays

In [7]:
import pandas as pd
import numpy as np

data = np.array([[1,2,3], [4,5,6], [7,8,9]])

df = pd.DataFrame(data, columns=['col1', 'col2', 'col3'])
df

Unnamed: 0,col1,col2,col3
0,1,2,3
1,4,5,6
2,7,8,9


##### Creating Empty DataFrames

In [8]:
import pandas as pd

df = pd.DataFrame(columns=['Name', 'Age', 'City'])
df

Unnamed: 0,Name,Age,City


##### Creating Empty DataFrames with Index

In [9]:
import pandas as pd

df = pd.DataFrame(columns=['Name', 'Age', 'City'], index=['id1', 'id2', 'id3'])
df

Unnamed: 0,Name,Age,City
id1,,,
id2,,,
id3,,,


##### From Python Lists

In [11]:
import pandas as pd

data = [
    ['Ali', 20],
    ['Bob', 22],
    ['Jack', 25]
]
df = pd.DataFrame(data, columns=['Name', 'Age'])
df

Unnamed: 0,Name,Age
0,Ali,20
1,Bob,22
2,Jack,25


In [12]:
import pandas as pd

df = pd.read_excel('data.xlsx')
df

Unnamed: 0,Name,School,Marks
0,Hannna,ARR,32
1,Mohid,RRR,44
2,Hassan,SSS,55
3,Talha,SES,66
4,Ali,AWE,54
5,Abdullah,ASD,56
6,Jack,AFG,76
7,Bob,GFF,88
8,John,RTF,98
9,Smith,GFE,61


In [13]:
df = pd.read_csv('data.csv')
df

Unnamed: 0,Name,School,Marks
0,Hannna,ARR,32
1,Mohid,RRR,44
2,Hassan,SSS,55
3,Talha,SES,66
4,Ali,AWE,54
5,Abdullah,ASD,56
6,Jack,AFG,76
7,Bob,GFF,88
8,John,RTF,98
9,Smith,GFE,61


In [14]:
df = pd.read_json('data.json')
df

Unnamed: 0,ID,Name,Age,City,Marks
0,1,Ali,20,Lahore,85
1,2,Sara,22,Karachi,90
2,3,Hassan,21,Islamabad,78
3,4,Ayesha,23,Multan,88


In [15]:
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv"

df = pd.read_csv(url)
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [17]:
df.to_csv('download_data.csv', index=False)

In [37]:
df = pd.read_csv('students.csv')
df

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58
5,6,Zara,22,F,Karachi,81,85,79
6,7,Usman,21,M,Islamabad,73,68,70
7,8,Maria,23,F,Multan,89,92,94


In [38]:
df.head()

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
0,1,Ali,20,M,Lahore,78,65,72
1,2,Sara,22,F,Karachi,88,90,85
2,3,Hassan,21,M,Islamabad,67,70,75
3,4,Ayesha,23,F,Multan,92,95,89
4,5,Omar,20,M,Lahore,55,60,58


In [39]:
df.tail(3)

Unnamed: 0,ID,Name,Age,Gender,City,Math,English,Science
5,6,Zara,22,F,Karachi,81,85,79
6,7,Usman,21,M,Islamabad,73,68,70
7,8,Maria,23,F,Multan,89,92,94


In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 8 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   ID       8 non-null      int64 
 1   Name     8 non-null      object
 2   Age      8 non-null      int64 
 3   Gender   8 non-null      object
 4   City     8 non-null      object
 5   Math     8 non-null      int64 
 6   English  8 non-null      int64 
 7   Science  8 non-null      int64 
dtypes: int64(5), object(3)
memory usage: 644.0+ bytes


In [41]:
df.describe()

Unnamed: 0,ID,Age,Math,English,Science
count,8.0,8.0,8.0,8.0,8.0
mean,4.5,21.5,77.875,78.125,77.75
std,2.44949,1.195229,12.540648,13.809288,11.5604
min,1.0,20.0,55.0,60.0,58.0
25%,2.75,20.75,71.5,67.25,71.5
50%,4.5,21.5,79.5,77.5,77.0
75%,6.25,22.25,88.25,90.5,86.0
max,8.0,23.0,92.0,95.0,94.0


In [43]:
df.columns

Index(['ID', 'Name', 'Age', 'Gender', 'City', 'Math', 'English', 'Science'], dtype='object')

In [44]:
df.shape

(8, 8)

In [45]:
print(df['Gender'].value_counts())

Gender
M    4
F    4
Name: count, dtype: int64


In [46]:
print(df['Name'])

0       Ali
1      Sara
2    Hassan
3    Ayesha
4      Omar
5      Zara
6     Usman
7     Maria
Name: Name, dtype: object


In [47]:
print(df['Age'].value_counts())

Age
20    2
22    2
21    2
23    2
Name: count, dtype: int64


In [49]:
print(df.duplicated().sum())

0


In [50]:
print(df['Age'].unique())

[20 22 21 23]
