## importing data from a link by read csv function 

In [29]:
# https://github.com/dr5hn/countries-states-cities-database/blob/master/csv/translations.csv
import requests
from io import StringIO
import pandas as pd

# Get CSV from GitHub
url = "https://raw.githubusercontent.com/dr5hn/countries-states-cities-database/master/csv/translations.csv"
req = requests.get(url)
data = StringIO(req.text)

# Read into DataFrame
df = pd.read_csv(data)

# Save as TSV
df.to_csv("translations.tsv", sep="\t", index=False)
df
# print("TSV file saved as translations.tsv")

Unnamed: 0,place_id,place_type,language,translation
0,1,country,br,Afghanistan
1,1,country,ko,아프가니스탄
2,1,country,pt-BR,Afeganistão
3,1,country,pt,Afeganistão
4,1,country,nl,Afghanistan
...,...,...,...,...
4650,17,subregion,it,Europa occidentale
4651,17,subregion,zh-CN,西欧
4652,17,subregion,ru,Западная Европа
4653,17,subregion,uk,Західна Європа


## sep parameter

In [17]:
f = pd.read_csv('translations.tsv', sep="\t",names=["place_Id","Place_Type","Language","Translation"])  # name is used to add columns 
# name sep is seperator
print(f.head(20))

    place_Id  Place_Type  Language  Translation
0   place_id  place_type  language  translation
1          1     country        br  Afghanistan
2          1     country        ko       아프가니스탄
3          1     country     pt-BR  Afeganistão
4          1     country        pt  Afeganistão
5          1     country        nl  Afghanistan
6          1     country        hr   Afganistan
7          1     country        fa    افغانستان
8          1     country        de  Afghanistan
9          1     country        es   Afganistán
10         1     country        fr  Afghanistan
11         1     country        ja      アフガニスタン
12         1     country        it  Afghanistan
13         1     country     zh-CN          阿富汗
14         1     country        tr   Afganistan
15         1     country        ru   Афганистан
16         1     country        uk   Афганістан
17         1     country        pl   Afganistan
18         2     country        br        Åland
19         2     country        ko      

## headers parameter 

In [45]:
# name is used to add columns  name sep is seperator
f = pd.read_csv('translations.tsv', sep="\t",header=0) # start header form row 0
f

Unnamed: 0,place_id,place_type,language,translation
0,1,country,br,Afghanistan
1,1,country,ko,아프가니스탄
2,1,country,pt-BR,Afeganistão
3,1,country,pt,Afeganistão
4,1,country,nl,Afghanistan
...,...,...,...,...
4650,17,subregion,it,Europa occidentale
4651,17,subregion,zh-CN,西欧
4652,17,subregion,ru,Западная Европа
4653,17,subregion,uk,Західна Європа


### remove columns ata the time od 

In [47]:
pd.read_csv('translations.tsv',sep="\t",usecols=['place_id','language'])

Unnamed: 0,place_id,language
0,1,br
1,1,ko
2,1,pt-BR
3,1,pt
4,1,nl
...,...,...
4650,17,it
4651,17,zh-CN
4652,17,ru
4653,17,uk


## skip rows and n rows 

In [55]:
f=pd.read_csv("translations.tsv", sep="\t", skiprows=1) # we can skip rows
n=pd.read_csv("translations.tsv", sep="\t", nrows=1) # limit the rows to a numeber here only 1 row will come
n

Unnamed: 0,place_id,place_type,language,translation
0,1,country,br,Afghanistan


#### some files have diffrent encodeinf we can chnage it via  -> encoding="" parameter 

### some times some rows of a file can have more columns than the other rows like one row have 2 more columns due to this the pandas give error to avoid this error we can use  **error_bad_lines=False** then those lines will be skipped 

### chnage the data type of a columns 

In [64]:
pd.read_csv("translations.tsv", sep="\t",dtype={'place_id':float}).head(20) ## int is converted to float
# pd.read_csv("translations.tsv", sep="\t",dtype={'place_id':float}).head(20).info()  get info of data types

Unnamed: 0,place_id,place_type,language,translation
0,1.0,country,br,Afghanistan
1,1.0,country,ko,아프가니스탄
2,1.0,country,pt-BR,Afeganistão
3,1.0,country,pt,Afeganistão
4,1.0,country,nl,Afghanistan
5,1.0,country,hr,Afganistan
6,1.0,country,fa,افغانستان
7,1.0,country,de,Afghanistan
8,1.0,country,es,Afganistán
9,1.0,country,fr,Afghanistan


## convertors 

In [93]:
def rename(name):
    if name == "Afghanistan":
        return "AFG"
    else:
        return name

In [97]:
pd.read_csv("translations.tsv", sep="\t" ,converters={'translation': rename}) ## used to transform a particular column 

Unnamed: 0,place_id,place_type,language,translation
0,1,country,br,AFG
1,1,country,ko,아프가니스탄
2,1,country,pt-BR,Afeganistão
3,1,country,pt,Afeganistão
4,1,country,nl,AFG
...,...,...,...,...
4650,17,subregion,it,Europa occidentale
4651,17,subregion,zh-CN,西欧
4652,17,subregion,ru,Западная Европа
4653,17,subregion,uk,Західна Європа


## seperate data onto chunks 

In [101]:
chunks = pd.read_csv("translations.tsv", sep="\t",chunksize=2000)
for data in chunks:
    print(data.shape)

(2000, 4)
(2000, 4)
(655, 4)
