# Pandas, Reading CSV file with various parameters

In [1]:
import pandas as pd
import numpy as np

In [7]:
df=pd.read_csv("chess.csv")
df.head()

Unnamed: 0,Piece,Value,Color,Position
0,Pawn,1,White,a2
1,Pawn,1,Black,a7
2,Knight,3,White,b1
3,Knight,3,Black,b8
4,Bishop,3,White,c1


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Piece     12 non-null     object
 1   Value     12 non-null     object
 2   Color     12 non-null     object
 3   Position  12 non-null     object
dtypes: object(4)
memory usage: 516.0+ bytes


In [11]:
df.describe()

Unnamed: 0,Piece,Value,Color,Position
count,12,12,12,12
unique,6,5,2,12
top,Pawn,3,White,a2
freq,2,4,6,1


In [12]:
df['Piece'].value_counts()

Piece
Pawn      2
Knight    2
Bishop    2
Rook      2
Queen     2
King      2
Name: count, dtype: int64

In [18]:
from io import StringIO, BytesIO

data=('c1,c2,c3\n'
      '1,2,3\n'
      '4,5,6')

In [16]:
type(data)

str

In [21]:
#read from specific columns
pd.read_csv(StringIO(data),usecols=['c1','c2'])

Unnamed: 0,c1,c2
0,1,2
1,4,5


In [29]:
df=pd.read_csv(StringIO(data), dtype={'c1':int,'c2':float, 'c3':'Int64'})
df

Unnamed: 0,c1,c2,c3
0,1,2.0,3
1,4,5.0,6


In [30]:
df['c1']

0    1
1    4
Name: c1, dtype: int32

In [31]:
#check data types
df.dtypes

c1      int32
c2    float64
c3      Int64
dtype: object

## Read Json to CSV

In [38]:
# Simplified resume data
data = {
    "name": "John Doe",
    "email": "john.doe@example.com",
    "phone": "+1234567890",
    "address": "123 Main St, Anytown, USA",
    "summary": "Experienced software developer with a strong background in web development and data science.",
    "degree_1": "Bachelor of Science in Computer Science",
    "institution_1": "University of Example",
    "year_1": 2020,
    "degree_2": "Master of Science in Data Science",
    "institution_2": "Example University",
    "year_2": 2022,
    "job_title_1": "Software Developer",
    "company_1": "Tech Solutions Inc.",
    "years_1": "2022-Present",
    "job_title_2": "Data Analyst Intern",
    "company_2": "Data Insights LLC",
    "years_2": "2020-2021",
    "skills": "Python, JavaScript, HTML/CSS, Pandas, NumPy, Git",
    "certification_1": "Certified Data Scientist",
    "certification_institution_1": "Data Science Academy",
    "certification_year_1": 2021,
    "project_1": "Web Development Portfolio",
    "project_description_1": "A collection of web development projects showcasing skills in HTML, CSS, and JavaScript.",
    "project_link_1": "https://github.com/johndoe/web-portfolio",
    "project_2": "Data Analysis Project",
    "project_description_2": "A project analyzing sales data to identify trends and insights.",
    "project_link_2": "https://github.com/johndoe/data-analysis"
}

In [44]:
# Convert JSON data to a DataFrame with an index
df = pd.DataFrame([data], index=[0])
df

Unnamed: 0,name,email,phone,address,summary,degree_1,institution_1,year_1,degree_2,institution_2,...,skills,certification_1,certification_institution_1,certification_year_1,project_1,project_description_1,project_link_1,project_2,project_description_2,project_link_2
0,John Doe,john.doe@example.com,1234567890,"123 Main St, Anytown, USA",Experienced software developer with a strong b...,Bachelor of Science in Computer Science,University of Example,2020,Master of Science in Data Science,Example University,...,"Python, JavaScript, HTML/CSS, Pandas, NumPy, Git",Certified Data Scientist,Data Science Academy,2021,Web Development Portfolio,A collection of web development projects showc...,https://github.com/johndoe/web-portfolio,Data Analysis Project,A project analyzing sales data to identify tre...,https://github.com/johndoe/data-analysis


In [45]:
# Save DataFrame to CSV
df.to_csv('simple_resume_data.csv', index=False)


In [49]:
#convert json to different json formats
df.to_json(orient='records')

'[{"name":"John Doe","email":"john.doe@example.com","phone":"+1234567890","address":"123 Main St, Anytown, USA","summary":"Experienced software developer with a strong background in web development and data science.","degree_1":"Bachelor of Science in Computer Science","institution_1":"University of Example","year_1":2020,"degree_2":"Master of Science in Data Science","institution_2":"Example University","year_2":2022,"job_title_1":"Software Developer","company_1":"Tech Solutions Inc.","years_1":"2022-Present","job_title_2":"Data Analyst Intern","company_2":"Data Insights LLC","years_2":"2020-2021","skills":"Python, JavaScript, HTML\\/CSS, Pandas, NumPy, Git","certification_1":"Certified Data Scientist","certification_institution_1":"Data Science Academy","certification_year_1":2021,"project_1":"Web Development Portfolio","project_description_1":"A collection of web development projects showcasing skills in HTML, CSS, and JavaScript.","project_link_1":"https:\\/\\/github.com\\/johndoe\

### Reading HTML content

In [60]:
# requires lxml module to run this 
url= 'https://www.fdic.gov/bank/individual/failed/banklist.html'
dfs=pd.read_html(url, match='State', header=0)
dfs[0]

Unnamed: 0,Bank Name,City,State,Cert,Aquiring Institution,Closing Date,Fund Sort ascending
0,Republic First Bank dba Republic Bank,Philadelphia,Pennsylvania,27332,"Fulton Bank, National Association","April 26, 2024",10546
1,Citizens Bank,Sac City,Iowa,8758,Iowa Trust & Savings Bank,"November 3, 2023",10545
2,Heartland Tri-State Bank,Elkhart,Kansas,25851,"Dream First Bank, N.A.","July 28, 2023",10544
3,First Republic Bank,San Francisco,California,59017,"JPMorgan Chase Bank, N.A.","May 1, 2023",10543
4,Signature Bank,New York,New York,57053,"Flagstar Bank, N.A.","March 12, 2023",10540
5,Silicon Valley Bank,Santa Clara,California,24735,First Citizens Bank & Trust Company,"March 10, 2023",10539
6,Almena State Bank,Almena,Kansas,15426,Equity Bank,"October 23, 2020",10538
7,First City Bank of Florida,Fort Walton Beach,Florida,16748,"United Fidelity Bank, fsb","October 16, 2020",10537
8,The First State Bank,Barboursville,West Virginia,14361,"MVB Bank, Inc.","April 3, 2020",10536
9,Ericson State Bank,Ericson,Nebraska,18265,Farmers and Merchants Bank,"February 14, 2020",10535


### Reading Excel files

In [61]:
# required openpyxl module to run this
df_excel=pd.read_excel('Excel_Sample.xlsx')
df_excel

Unnamed: 0,Fruits,Quantity,Cost
0,Apple,10,1.5
1,Banana,20,0.5
2,Cherry,15,2.0
3,Date,25,3.0
4,Elderberry,5,4.0
5,Fig,30,2.5
6,Grape,50,1.0
7,Honeydew,10,3.5
8,Kiwi,40,2.0
9,Lemon,35,1.5


## Pickling

In [62]:
df_excel.to_pickle('df_excel')

In [63]:
df=pd.read_pickle('df_excel')
df.head()

Unnamed: 0,Fruits,Quantity,Cost
0,Apple,10,1.5
1,Banana,20,0.5
2,Cherry,15,2.0
3,Date,25,3.0
4,Elderberry,5,4.0
