## How to build a DataFrame from multiple files (row-wise)?

In [1]:
import pandas as pd

In [2]:
#create dataframes
pd.read_csv('stocks1.csv', sep=';', usecols=['date','ticker','price'])
pd.read_csv('stocks2.csv', sep=';', usecols=['date','ticker','price'])
pd.read_csv('stocks3.csv', sep=';', usecols=['date','ticker','price'])

Unnamed: 0,date,ticker,price
0,2020-12-03,AAPL,1345
1,2020-12-03,MSFT,333
2,2020-12-03,GOOG,231


In [3]:
#IMPORTANT: PANDAS
#glob #sorted #read #pandas
#built-in glob-modul
#will return all files which match pattern stocks*.csv
#glob returns filenames in an arbitrary order, so we use sorted
from glob import glob
stock_files = sorted(glob('stocks*.csv'))
stock_files

['stocks1.csv', 'stocks2.csv', 'stocks3.csv']

In [4]:
#IMPORTANT: PANDAS
#glob #sorted #index #row #row-wise #concat #read #pandas
#using generator expression to reach each of the files using read_csv
#pass the results to concat which will concatenate the rows into a df

#commented method will keep the indexes
#pd.concat((pd.read_csv(file, sep=';', usecols=['date','ticker','price']) for file in stock_files))
pd.concat((pd.read_csv(file, sep=';', usecols=['date','ticker','price']) for file in stock_files), ignore_index=True)

Unnamed: 0,date,ticker,price
0,2020-12-01,AAPL,1234
1,2020-12-01,MSFT,34
2,2020-12-01,GOOG,23
3,2020-12-02,AAPL,134
4,2020-12-02,MSFT,332
5,2020-12-02,GOOG,233
6,2020-12-03,AAPL,1345
7,2020-12-03,MSFT,333
8,2020-12-03,GOOG,231


## How to build a DataFrame from multiple files (col-wise)?

In [5]:
pd.read_csv('stocks1.csv', sep=';', usecols=['date','ticker','price'])

Unnamed: 0,date,ticker,price
0,2020-12-01,AAPL,1234
1,2020-12-01,MSFT,34
2,2020-12-01,GOOG,23


In [6]:
pd.read_csv('stocks2.csv', sep=';', usecols=['date','ticker','price'])

Unnamed: 0,date,ticker,price
0,2020-12-02,AAPL,134
1,2020-12-02,MSFT,332
2,2020-12-02,GOOG,233


In [7]:
stocks_files = sorted(glob('stocks*.csv'))

In [8]:
#IMPORTANT: PANDAS
#glob #sorted #cols #col-wise #concat #read #pandas
#using generator expression to reach each of the files using read_csv
#pass the results to concat which will concatenate the rows into a df

pd.concat((pd.read_csv(file, sep=';', usecols=['date','ticker','price']) for file in stocks_files), axis='columns')

Unnamed: 0,date,ticker,price,date.1,ticker.1,price.1,date.2,ticker.2,price.2
0,2020-12-01,AAPL,1234,2020-12-02,AAPL,134,2020-12-03,AAPL,1345
1,2020-12-01,MSFT,34,2020-12-02,MSFT,332,2020-12-03,MSFT,333
2,2020-12-01,GOOG,23,2020-12-02,GOOG,233,2020-12-03,GOOG,231
