# DIFFERENT WAYS OF CREATING A DATAFRAME

## Heading number 2

### Heading number 3

In [2]:
import numpy as np
import pandas as pd

%config IPCompleter.greedy = True
# Suppress scientific notation
np.set_printoptions(suppress=True)

#### **FROM LIST OF LISTS**

In [2]:
# create a list of lists
data = [['Preben', 37], ['Mads', 34], ['Brede', 27]]

# name the columns
columns = ['Name', 'Age']

# create df
pd.DataFrame(data, columns=columns)

Unnamed: 0,Name,Age
0,Preben,37
1,Mads,34
2,Brede,27


# FROM DICTIONARY

In [3]:
# create dict
my_dict = {
    'Column one': [100, 200, 300],
    'Column two': [150, 300, 450]
}

pd.DataFrame(my_dict)

Unnamed: 0,Column one,Column two
0,100,150
1,200,300
2,300,450


# FROM NUMPY RANDOM MODULE

In [37]:
# set number of rows and columns
num_rows, num_columns = 4, 6

# create a numpy array
array = np.random.rand(num_rows, num_columns)

# set column names by using a list comprehension
column_names = [f'Column {x}' for x in range(1, num_columns + 1)]

# create dataframe
pd.DataFrame(array,  columns=column_names)

Unnamed: 0,Column 1,Column 2,Column 3,Column 4,Column 5,Column 6
0,0.649589,0.696302,0.878028,0.137129,0.565972,0.4246
1,0.846435,0.714315,0.288406,0.604404,0.234263,0.016862
2,0.360673,0.880504,0.785335,0.978225,0.228984,0.246319
3,0.590763,0.760698,0.766743,0.35581,0.555719,0.861458


## BUILD DATAFRAME FROM MULTIPLE EXCEL SHEETS

In [None]:
def open_multiple_sheets(path, sheets):
    li = []
    for sheet in sheets:
        df = pd.read_excel(path, sheet_name=sheet)
        li.append(df)
    return pd.concat(li, axis=0, ignore_index=True)

# BUILD DATAFRAME FROM MULTIPLE FILES (ROW-WISE)

In [None]:
from glob import glob
stock_files = sorted(glob('data/stocks*.csv'))
df = pd.concat((pd.read_csv(file) for file in stock_files), ignore_index=True)

# CREATE SERIES FROM LIST, DICT, NUMPY ARRAY

In [11]:
# Inputs
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

# create series from list
ser1 = pd.Series(mylist)

# create series from numpy array
ser2 = pd.Series(myarr)

# create series from dict
ser3 = pd.Series(mydict)
print(ser3.head())

a    0
b    1
c    2
e    3
d    4
dtype: int64


### How to assign name to the series’ index?

In [5]:
# Input
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

# Solution
ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object