In [104]:
import pandas as pd
import numpy as np
from io import StringIO
import requests

df = pd.DataFrame(data = np.arange(0,20).reshape(5,4),index =['Row1','Row2','Row3','Row4','Row5'],columns = ['Column1','Column2','Column3','Column4'])
print(df)


      Column1  Column2  Column3  Column4
Row1        0        1        2        3
Row2        4        5        6        7
Row3        8        9       10       11
Row4       12       13       14       15
Row5       16       17       18       19


In [85]:
df.head()


Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [86]:
df.tail()

Unnamed: 0,Column1,Column2,Column3,Column4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [87]:
type(df)

pandas.core.frame.DataFrame

In [88]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, Row1 to Row5
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Column1  5 non-null      int64
 1   Column2  5 non-null      int64
 2   Column3  5 non-null      int64
 3   Column4  5 non-null      int64
dtypes: int64(4)
memory usage: 200.0+ bytes


In [89]:
df.describe()

Unnamed: 0,Column1,Column2,Column3,Column4
count,5.0,5.0,5.0,5.0
mean,8.0,9.0,10.0,11.0
std,6.324555,6.324555,6.324555,6.324555
min,0.0,1.0,2.0,3.0
25%,4.0,5.0,6.0,7.0
50%,8.0,9.0,10.0,11.0
75%,12.0,13.0,14.0,15.0
max,16.0,17.0,18.0,19.0


In [90]:
#Indexing 
df[['Column1','Column2','Column3']] #By using Column Name
df.loc[['Row3','Row2']]  # Indexing using the Row
df.iloc[0:1,2:4]

Unnamed: 0,Column3,Column4
Row1,2,3


In [91]:
##convert the dataframe into arrays
df.iloc[0:1,2:4].values

array([[2, 3]])

In [92]:
#Operations
df.isnull().sum(axis = 1)


Row1    0
Row2    0
Row3    0
Row4    0
Row5    0
dtype: int64

In [93]:
df['Column1'].unique()

array([ 0,  4,  8, 12, 16])

In [94]:
#How to create the dataframe from a csv string data 
data = ('col1,col2,col3\n'
        'x,y,1\n'
        'a,b,2\n'
        'c,d,3')
data
df = pd.read_csv(StringIO(data),usecols = ['col1','col2','col3'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   col1    3 non-null      object
 1   col2    3 non-null      object
 2   col3    3 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


In [95]:
df.to_csv('test.csv')


PermissionError: [Errno 13] Permission denied: 'test.csv'

In [None]:
#Datatypes in csv
df = pd.read_csv('test.csv',dtype = {'col1' : object, 'col2' : object, 'col3' : int},index_col= 0)
df

#Working with the JSON Files

In [None]:
# data = '[{"index_name" : "1" , "name" : "Shivam", "age" : "30"},{"index_name" : "1" , "name" : "Shivam", "age" : "30"}]'
data = """{
  "row1": {"index_name": "1", "name": "Shivam", "age": "30"},
  "row2": {"index_name": "2", "name": "Rahul", "age": "25"}}"""
pd.read_json(data)


In [None]:
pd.read_json(StringIO(data),orient = "index")

In [None]:
data = pd.DataFrame([['Shivam',25],['Om',34]],columns = ['Name','Age'], index = ['Row1','Row2'])
data

In [None]:
data.to_json(orient = "index")

In [None]:
data.to_json(orient = "columns")

In [106]:
data = [{
    "employee": {
    "id": 101,
    "name": "Shivam",
    "age": 30,
    "address": {
      "street": "MG Road",
      "city": "Bengaluru",
      "state": "Karnataka",
      "pin": 560001
    },
  }},
        
  {"employee": {
    "id": 102,
    "name": "Rahul",
    "age": 28,
    "address": {
      "street": "Sector 62",
      "city": "Noida",
      "state": "Uttar Pradesh",
      "pin": 201301
    },
  }
}]

pd.json_normalize(data,"address",["street"])


KeyError: "Key 'address' not found. If specifying a record_path, all elements of data should have the path."

In [122]:
url = "https://en.wikipedia.org/wiki/Mobile_country_code"

# Add headers to mimic a real browser
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(url, headers=headers)
# Now parse the HTML content
tables = pd.read_html(response.text)
print(type(tables))
print(type(tables[0]))

  tables = pd.read_html(response.text)


<class 'list'>
<class 'pandas.core.frame.DataFrame'>


In [121]:
df = pd.DataFrame(data = tables[0])
df
print(f"Found {len(tables)} tables")
print(type(tables[0]))
print(tables[0])
type(tables)


Unnamed: 0,MCC,MNC,Brand,Operator,Status,Bands (MHz),References and notes
0,1,1,TEST,Test network,Operational,any,
1,1,1,TEST,Test network,Operational,any,
2,999,99,,Internal use,Operational,any,"Internal use in private networks, no roaming[6]"
3,999,999,,Internal use,Operational,any,"Internal use in private networks, no roaming[6]"


In [112]:
print(tables[0])

   MCC  MNC Brand      Operator       Status Bands (MHz)  \
0    1    1  TEST  Test network  Operational         any   
1    1    1  TEST  Test network  Operational         any   
2  999   99   NaN  Internal use  Operational         any   
3  999  999   NaN  Internal use  Operational         any   

                              References and notes  
0                                              NaN  
1                                              NaN  
2  Internal use in private networks, no roaming[6]  
3  Internal use in private networks, no roaming[6]  


In [126]:
df = pd.read_xml("practice.xml")
print(df)

    id    name  age department         address
0  101  Shivam   30         IT  \n            
1  102   Rahul   28    Finance  \n            


In [124]:
type(df)

pandas.core.frame.DataFrame

In [129]:
xml = """<?xml version="1.0" encoding="UTF-8"?>
<company>
    <employee id="101">
        <name>Shivam</name>
        <age>30</age>
        <department>IT</department>
        <address>
            <street>MG Road</street>
            <city>Bengaluru</city>
            <state>Karnataka</state>
            <pin>560001</pin>
        </address>
    </employee>

    <employee id="102">
        <name>Rahul</name>
        <age>28</age>
        <department>Finance</department>
        <address>
            <street>Sector 62</street>
            <city>Noida</city>
            <state>Uttar Pradesh</state>
            <pin>201301</pin>
        </address>
    </employee>
</company>"""

pd.read_xml(StringIO(xml))


Unnamed: 0,id,name,age,department,address
0,101,Shivam,30,IT,\n
1,102,Rahul,28,Finance,\n


In [131]:
#Pickling and Unpickling the data object in python
import seaborn as sns

df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [132]:
import pickle

In [134]:
filename = 'file1.pkl'
pickle.dump(df,open(filename,'wb'))

In [135]:
data = pickle.load(open("file1.pkl",'rb'))
data

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [137]:
#Python Functions Positional and the the Keyword Arguments 

def check(*args,**kargs):
    print(args)
    print(kargs)

check(2,3,name = "Shivam",age = 30)

#

(2, 3)
{'name': 'Shivam', 'age': 30}


#In the above example 
#the postional arguments -> tuple form
#the keyword arguments -> dict form


Classes in Python and OOPS

In [142]:
class Car:
    def __init__(self,engine,tyres,windows):
        self.engine = engine
        self.tyres = tyres
        self.windows = windows
        
    def self_driving(self):
        print("The No of tyres in the car {},{}".format(self.tyres,self.engine))
        print("ghello from",self.engine,"No of Tyres",self.tyres)
        
Car1 = Car(4,4,3)
Car1.self_driving()

The No of tyres in the car 4,4
ghello from 4 No of Tyres 4
