In [None]:
import pandas as pd
import numpy as np

# **CSV files**
> **A CSV (comma-separated values) file is a simple text file in which information is separated by commas.**

### **Loading a csv file**

In [None]:
df = pd.read_csv('titanic/train.csv')

In [None]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### **Loading from a specific URL**

In [None]:
import requests
from io import StringIO

url = "https://raw.githubusercontent.com/SharadSaha/ML_notebooks/main/Kaggle/Titanic_dataset/train.csv"
headers = {"User-Agent" : "PythonUser"}
req = requests.get(url,headers=headers)
data = StringIO(req.text)

In [None]:
df_from_url = pd.read_csv(data)
df_from_url.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


### **Make particular column as index column : (index_col)**

In [None]:
d1 = pd.read_csv('titanic/train.csv',index_col='PassengerId')
d1.head(4)

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


### **Set particular row as header : (header)**

In [None]:
d2 = pd.read_csv('titanic/train.csv',header=1)
d2.head(4)

Unnamed: 0,1,0,3,"Braund, Mr. Owen Harris",male,22,1.1,0.1,A/5 21171,7.25,Unnamed: 10,S
0,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
1,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
2,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
3,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### **Use only specific columns for the dataset : (usecols)**

In [None]:
d3 = pd.read_csv('titanic/train.csv', usecols = ['Name','Sex','Age','Survived'])
d3.head(4)

Unnamed: 0,Survived,Name,Sex,Age
0,0,"Braund, Mr. Owen Harris",male,22.0
1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0
2,1,"Heikkinen, Miss. Laina",female,26.0
3,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0


### **Skip specific rows while loading**

In [None]:
d4 = pd.read_csv('titanic/train.csv', skiprows= lambda x: x in [1,3,5]) # takes lambda function, can implement any logic
d4.head(4)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
1,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
2,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
3,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S


### **Read only n rows (nrows)**

In [None]:
d5 = pd.read_csv('titanic/train.csv',nrows=3)
print(len(d5))

3


#### **Encoding, skip bad lines parameters**

In [None]:
d6 = pd.read_csv('titanic/train.csv',encoding = 'latin-1',error_bad_lines = False)
d6.head(4)



  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


### **Convert columns to specific data types : (dtype)**

In [None]:
d7 = pd.read_csv('titanic/train.csv',dtype = {'Survived':float})
d7.head(4)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0.0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1.0,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


### **parse_dates to treat date strings as datetime64 objects**

### **Apply a function to specific columns : (converters)**

In [None]:
def rename(name):
  if name=="male":
    return "M"
  else:
    return "F"

In [None]:
d8 = pd.read_csv('titanic/train.csv',converters={'Sex':rename})
d8.head(4)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",M,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",F,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",F,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",F,35.0,1,0,113803,53.1,C123,S


### **Treat specific values as NA values : (na_values)**

In [None]:
d9 = pd.read_csv('titanic/train.csv',na_values = ['male'])
d9.head(4)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


### **Load dataset in chunks**

In [None]:
d10 = pd.read_csv('titanic/train.csv',chunksize = 200)

In [None]:
for chunk in d10:
  print(chunk.shape)

(200, 12)
(200, 12)
(200, 12)
(200, 12)
(91, 12)


# **JSON data**
> **JSON (JavaScript Object Notation, is an open standard file format and data interchange format that uses human-readable text to store and transmit data objects consisting of attribute–value pairs and arrays (or other serializable values). It is a common data format with diverse uses in electronic data interchange, including that of web applications with servers.**

### **Read json data from file**

In [None]:
df = pd.read_json('recipe.json')
df.head(4)

Unnamed: 0,id,ingredients
0,18009,"[baking powder, eggs, all-purpose flour, raisi..."
1,28583,"[sugar, egg yolks, corn starch, cream of tarta..."
2,41580,"[sausage links, fennel bulb, fronds, olive oil..."
3,29752,"[meat cuts, file powder, smoked sausage, okra,..."


### **Read JSOn from URL**

In [None]:
df_json_url = pd.read_json("https://www.ncdc.noaa.gov/cag/global/time-series/globe/land_ocean/ytd/12/1880-2016.json")

In [None]:
df_json_url.head(4)

Unnamed: 0,description,data
title,"Global Land and Ocean Temperature Anomalies, J...",
units,Degrees Celsius,
base_period,1901-2000,
missing,-999,
