# Dataframe from file
Pandas supports loading data from multiple file formats, including: excel, csv, json, xml.

In [1]:
import pandas as pd

In [3]:
students = pd.read_csv("../Data/students.csv", header=0) # header specifies at what line number the header is located (def 0)
students.head(1)

Unnamed: 0,name,class,email
0,Adam,AI23,adam@gmail.com


In [22]:
autos = pd.read_json("../Data/autos.json")
autos.head()
autos.tail()
autos.sample(5) # Random rows 
autos

Unnamed: 0,aspiration,body-style,bore,...,symboling,wheel-base,width
0,std,convertible,3.47,...,3,88.6,64.1
1,std,convertible,3.47,...,3,88.6,64.1
2,std,hatchback,2.68,...,1,94.5,65.5
3,std,sedan,3.19,...,2,99.8,66.2
4,std,sedan,3.19,...,2,99.4,66.4
...,...,...,...,...,...,...,...
200,std,sedan,3.78,...,-1,109.1,68.9
201,turbo,sedan,3.78,...,-1,109.1,68.8
202,std,sedan,3.58,...,-1,109.1,68.9
203,turbo,sedan,3.01,...,-1,109.1,68.9


In [21]:
# To change the number of columns and rows shown

pd.set_option("display.max_columns", 6)
pd.set_option("display.max_rows", 10)

In [27]:
rows, columns = autos.shape
print(f"Dataframe has {rows} rows, and {columns} columns.")

Dataframe has 205 rows, and 26 columns.


In [29]:
#Alternatively 
rows = len(autos)
columns = len(autos.columns)
print(f"Dataframe has {rows} rows, and {columns} columns.")

Dataframe has 205 rows, and 26 columns.


In [30]:
autos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   aspiration         205 non-null    object 
 1   body-style         205 non-null    object 
 2   bore               201 non-null    float64
 3   city-mpg           205 non-null    int64  
 4   compression-ratio  205 non-null    float64
 5   curb-weight        205 non-null    int64  
 6   drive-wheels       205 non-null    object 
 7   engine-location    205 non-null    object 
 8   engine-size        205 non-null    int64  
 9   engine-type        205 non-null    object 
 10  fuel-system        205 non-null    object 
 11  fuel-type          205 non-null    object 
 12  height             205 non-null    float64
 13  highway-mpg        205 non-null    int64  
 14  horsepower         203 non-null    float64
 15  length             205 non-null    float64
 16  make               205 non

In [11]:
populationstats = pd.read_excel("../Data/befolkningsstatistik2022.xlsx", header=3)
populationstats.head(3).transpose()

Unnamed: 0,0,1,2
Unnamed: 0,Folkmängd 31 december,Män,Kvinnor
2022,10521556,5298324,5223232
2021,10452326,5260707,5191619
2020,10379295,5222847,5156448
2019,10327589,5195814,5131775
2018,10230185,5142438,5087747
2017,10120242,5082662,5037580
2016,9995153,5013347,4981806
2015,9851017,4930966,4920051
2014,9747355,4872240,4875115


## Convert to common formats

In [31]:
autos.to_clipboard()

In [34]:
autos.to_csv("../Data/autos.csv", index=None)
csv_string = autos.to_csv()
csv_string[:1000]

',aspiration,body-style,bore,city-mpg,compression-ratio,curb-weight,drive-wheels,engine-location,engine-size,engine-type,fuel-system,fuel-type,height,highway-mpg,horsepower,length,make,normalized-losses,num-of-cylinders,num-of-doors,peak-rpm,price,stroke,symboling,wheel-base,width\r\n0,std,convertible,3.47,21,9.0,2548,rwd,front,130,dohc,mpfi,gas,48.8,27,111.0,168.8,alfa-romero,,four,two,5000.0,13495.0,2.68,3,88.6,64.1\r\n1,std,convertible,3.47,21,9.0,2548,rwd,front,130,dohc,mpfi,gas,48.8,27,111.0,168.8,alfa-romero,,four,two,5000.0,16500.0,2.68,3,88.6,64.1\r\n2,std,hatchback,2.68,19,9.0,2823,rwd,front,152,ohcv,mpfi,gas,52.4,26,154.0,171.2,alfa-romero,,six,two,5000.0,16500.0,3.47,1,94.5,65.5\r\n3,std,sedan,3.19,24,10.0,2337,fwd,front,109,ohc,mpfi,gas,54.3,30,102.0,176.6,audi,164.0,four,four,5500.0,13950.0,3.4,2,99.8,66.2\r\n4,std,sedan,3.19,18,8.0,2824,4wd,front,136,ohc,mpfi,gas,54.3,22,115.0,176.6,audi,164.0,five,four,5500.0,17450.0,3.4,2,99.4,66.4\r\n5,std,sedan,3.19,19,8.5,2507,fwd,fr

In [37]:
# To export json as tree structure, not as flat, use orient="records"
# Also possible to choose if to export column wise or row wise
# Has to do with serialization?

autos.to_json("../Data/new_autos.json", orient="records")
# Use indent to avoid getting everything on one long string
json_string = autos.to_json(indent=4, orient="records")
print(json_string[:200])

[
    {
        "aspiration":"std",
        "body-style":"convertible",
        "bore":3.47,
        "city-mpg":21,
        "compression-ratio":9.0,
        "curb-weight":2548,
        "drive-wheels":


In [39]:
my_dict = autos.to_dict(orient="records")
my_dict[0]

{'aspiration': 'std',
 'body-style': 'convertible',
 'bore': 3.47,
 'city-mpg': 21,
 'compression-ratio': 9.0,
 'curb-weight': 2548,
 'drive-wheels': 'rwd',
 'engine-location': 'front',
 'engine-size': 130,
 'engine-type': 'dohc',
 'fuel-system': 'mpfi',
 'fuel-type': 'gas',
 'height': 48.8,
 'highway-mpg': 27,
 'horsepower': 111.0,
 'length': 168.8,
 'make': 'alfa-romero',
 'normalized-losses': nan,
 'num-of-cylinders': 'four',
 'num-of-doors': 'two',
 'peak-rpm': 5000.0,
 'price': 13495.0,
 'stroke': 2.68,
 'symboling': 3,
 'wheel-base': 88.6,
 'width': 64.1}