In [3]:
import pandas as pd
import numpy as np
from io import StringIO
import requests
from dataclasses import dataclass

df = pd.DataFrame(data = np.arange(0,20).reshape(5,4),index =['Row1','Row2','Row3','Row4','Row5'],columns = ['Column1','Column2','Column3','Column4'])
print(df)


      Column1  Column2  Column3  Column4
Row1        0        1        2        3
Row2        4        5        6        7
Row3        8        9       10       11
Row4       12       13       14       15
Row5       16       17       18       19


In [4]:
df.head()


Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [5]:
df.tail()

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [6]:
type(df)

pandas.core.frame.DataFrame

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Row1 to Row5
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Column1  5 non-null      int64
 1   Column2  5 non-null      int64
 2   Column3  5 non-null      int64
 3   Column4  5 non-null      int64
dtypes: int64(4)
memory usage: 200.0+ bytes


In [8]:
df.describe()

Unnamed: 0,Column1,Column2,Column3,Column4
count,5.0,5.0,5.0,5.0
mean,8.0,9.0,10.0,11.0
std,6.324555,6.324555,6.324555,6.324555
min,0.0,1.0,2.0,3.0
25%,4.0,5.0,6.0,7.0
50%,8.0,9.0,10.0,11.0
75%,12.0,13.0,14.0,15.0
max,16.0,17.0,18.0,19.0


In [9]:
#Indexing 
df[['Column1','Column2','Column3']] #By using Column Name
df.loc[['Row3','Row2']]  # Indexing using the Row
df.iloc[0:1,2:4]

Unnamed: 0,Column3,Column4
Row1,2,3


In [10]:
##convert the dataframe into arrays
df.iloc[0:1,2:4].values

array([[2, 3]])

In [11]:
#Operations
df.isnull().sum(axis = 1)


Row1    0
Row2    0
Row3    0
Row4    0
Row5    0
dtype: int64

In [12]:
df['Column1'].unique()

array([ 0,  4,  8, 12, 16])

In [13]:
#How to create the dataframe from a csv string data 
data = ('col1,col2,col3\n'
        'x,y,1\n'
        'a,b,2\n'
        'c,d,3')
data
df = pd.read_csv(StringIO(data),usecols = ['col1','col2','col3'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   col1    3 non-null      object
 1   col2    3 non-null      object
 2   col3    3 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


In [14]:
df.to_csv('test.csv')


In [15]:
#Datatypes in csv
df = pd.read_csv('test.csv',dtype = {'col1' : object, 'col2' : object, 'col3' : int},index_col= 0)
df

Unnamed: 0,col1,col2,col3
0,x,y,1
1,a,b,2
2,c,d,3


#Working with the JSON Files

In [16]:
# data = '[{"index_name" : "1" , "name" : "Shivam", "age" : "30"},{"index_name" : "1" , "name" : "Shivam", "age" : "30"}]'
data = """{
  "row1": {"index_name": "1", "name": "Shivam", "age": "30"},
  "row2": {"index_name": "2", "name": "Rahul", "age": "25"}}"""
pd.read_json(data)


  pd.read_json(data)


Unnamed: 0,row1,row2
index_name,1,2
name,Shivam,Rahul
age,30,25


In [17]:
pd.read_json(StringIO(data),orient = "index")

Unnamed: 0,index_name,name,age
row1,1,Shivam,30
row2,2,Rahul,25


In [18]:
data = pd.DataFrame([['Shivam',25],['Om',34]],columns = ['Name','Age'], index = ['Row1','Row2'])
data

Unnamed: 0,Name,Age
Row1,Shivam,25
Row2,Om,34


In [19]:
data.to_json(orient = "index")

'{"Row1":{"Name":"Shivam","Age":25},"Row2":{"Name":"Om","Age":34}}'

In [20]:
data.to_json(orient = "columns")

'{"Name":{"Row1":"Shivam","Row2":"Om"},"Age":{"Row1":25,"Row2":34}}'

In [21]:
data = [{
    "employee": {
    "id": 101,
    "name": "Shivam",
    "age": 30,
    "address": {
      "street": "MG Road",
      "city": "Bengaluru",
      "state": "Karnataka",
      "pin": 560001
    },
  }},
        
  {"employee": {
    "id": 102,
    "name": "Rahul",
    "age": 28,
    "address": {
      "street": "Sector 62",
      "city": "Noida",
      "state": "Uttar Pradesh",
      "pin": 201301
    },
  }
}]

pd.json_normalize(data,"address",["street"])


KeyError: "Key 'address' not found. If specifying a record_path, all elements of data should have the path."

In [None]:
url = "https://en.wikipedia.org/wiki/Mobile_country_code"

# Add headers to mimic a real browser
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(url, headers=headers)
# Now parse the HTML content
tables = pd.read_html(response.text)
print(type(tables))
print(type(tables[0]))

In [None]:
df = pd.DataFrame(data = tables[0])
df
print(f"Found {len(tables)} tables")
print(type(tables[0]))
print(tables[0])
type(tables)


In [None]:
print(tables[0])

In [None]:
df = pd.read_xml("practice.xml")
print(df)

In [None]:
type(df)

In [None]:
xml = """<?xml version="1.0" encoding="UTF-8"?>
<company>
    <employee id="101">
        <name>Shivam</name>
        <age>30</age>
        <department>IT</department>
        <address>
            <street>MG Road</street>
            <city>Bengaluru</city>
            <state>Karnataka</state>
            <pin>560001</pin>
        </address>
    </employee>

    <employee id="102">
        <name>Rahul</name>
        <age>28</age>
        <department>Finance</department>
        <address>
            <street>Sector 62</street>
            <city>Noida</city>
            <state>Uttar Pradesh</state>
            <pin>201301</pin>
        </address>
    </employee>
</company>"""

pd.read_xml(StringIO(xml))


In [None]:
#Pickling and Unpickling the data object in python
import seaborn as sns

df = sns.load_dataset('tips')
df.head()

In [None]:
import pickle

In [None]:
filename = 'file1.pkl'
pickle.dump(df,open(filename,'wb'))

In [None]:
data = pickle.load(open("file1.pkl",'rb'))
data

In [None]:
#Python Functions Positional and the the Keyword Arguments 

def check(*args,**kargs):
    print(args)
    print(kargs)

check(2,3,name = "Shivam",age = 30)

#

#In the above example 
#the postional arguments -> tuple form
#the keyword arguments -> dict form


Classes in Python and OOPS

In [None]:
class Car:
    def __init__(self,engine,tyres,windows):
        self.engine = engine
        self.tyres = tyres
        self.windows = windows
        
    def self_driving(self):
        print("The No of tyres in the car {},{}".format(self.tyres,self.engine))
        print("ghello from",self.engine,"No of Tyres",self.tyres)
        
Car1 = Car(4,4,3)
Car1.self_driving()
print(Car1)

Inheritance in the child class

In [None]:
class Car:
    def __init__(self,engines,tyres,windows):
        self.engines = engines
        self.tyres = tyres
        self.windows = windows
        
    def self_driving(self):
        print("The type of engine in the car is {}".format(self.engines))

class Audi(Car):
    def __init__(self,engines,tyres,windows,horsepower):
        super().__init__(self,engines,tyres,windows)
        self.horsepower = horsepower
    def self_driving(self):
        
        

Car1 = Car("petrol",4,3)
Car1.self_driving()
Audi1 = Audi("electric",4,3,800)


In [None]:
#List Comprehension
list1 = [1,2,3,4,5]
list2 = [i**2 for i in list1]
print(list2)

In [None]:
list3 = [i for i in list1 if i % 2 == 0 ]
print(list3)

In [None]:
list4 = [[1,2,3],[4,5,6]]
list5 = [item for sublist in list4 for item in sublist]
print(list5)

In [None]:
list6 = [i**2 for i in list5 if i % 2 == 0]
print(list6)

#Lambda Functions


In [None]:
f = lambda x,y : x + y
f

In [None]:
f(3,5)

In [None]:
f1 = lambda x : len(x)
print(f1("My name is Shivam"))


In [None]:
lst = [1,2,3,4,5]
f2 = list(map(lambda x : x**2,lst))
print(f2)


In [None]:
f3 = list(filter(lambda x : x % 2 == 0, lst))
f3

In [None]:
fruits = ["apple","banana","cherry","date"]
f4 = list(sorted(fruits,key=lambda x : len(x)))
print(f4)

In [None]:
data = {'a' : 5 , 'b' : 6, 'c' : 1}
data1 = max(data,key = lambda x : data[x])
print(data1)

In [None]:
#Data Classes
from dataclasses import dataclass

@dataclass
class Person:
    name : str
    age : int
    profession : str

@dataclass
class student(Person):
    marks : list

std1 = student("Shivam",21,"Student",[10,9,8,10])
print(std1.marks)

In [24]:
@dataclass
class Address:
    street : str
    city : str
    pin_code : int

@dataclass
class Person(): 
    name : str
    age : int
    address : Address

address1 = Address("D-204","Prashant Vihar",110085)
p1 = Person("Shivam",20,address1)
print(p1.name)


    
    

Shivam


In [27]:
#Learn about the access specifiers like public, private, protected
#__name -> private
#_name -> protected

class Person():
    name : str
    age : int
    def __init__(self,name,age):
        self.__name = name
        self.__age = age      #This is  private 
    def getInfo(self):
        print(f"{self.__name} has age of {self.__age}")

p2 = Person("Shivam",20)
p2.getInfo()


Shivam has 20 age
None


In [31]:
class Person():
    name : str
    age : int
    def __init__(self,name,age):
        self._name = name
        self._age = age

class Student(Person):
    def __init__(self,name,age):
        super().__init__(name,age)   #This is Protected
    def getInfo(self):
        print(f"{self._name} has age of {self._age}")
        

p2 = Person("Shivam",20)
# p2.getInfo()

s1 = Student("Akanksha",23)
s1.getInfo()
s1.name

Akanksha has age of 23


AttributeError: 'Student' object has no attribute 'name'