# Create DataFrame

In [3]:
import numpy as np
import pandas as pd

## From other objects

### From list

In [4]:
# from one dimensional list
df = pd.DataFrame([10,20,30],columns=['col1'])
df

Unnamed: 0,col1
0,10
1,20
2,30


In [None]:
# from list of lists:
df = pd.DataFrame([
    [10,20,30],
    [4,7,9]
], columns=['col1', 'col2', 'col3'])

df

### from  numpy arrays

In [None]:
# from multi dimensional nddarray
ndarr = np.arange(1,7).reshape(3,2)
df = pd.DataFrame(ndarr, index=['row1','row2','row3'],columns=['col1', 'col2'])

print(f'ndarr is : \n{ndarr}\n')
print(f'df is: \n{df}')

### from Python Dictionary

In [None]:
# from a dictionary of equally shaped arrays
# keys will be the columns names
# array values will be the data.

prices_dict = {
    "fruits": ["apples", "oranges", "bananas", "strawberries"],
    "prices": [1.5, 2, 2.5, 3],
    "suppliers": ["supplier1", "supplier2", "supplier4", "supplier3"],
}

prices_df = pd.DataFrame(prices_dict, index = [1,2,3,4])
prices_df

## Load data from multiple file formats

http://pandas.pydata.org/pandas-docs/stable/io.html

### From CSV files

In [None]:
csv_df = pd.read_csv("../../datasets/various/drinks.csv", sep=",")

csv_df.head(5)

#### header parameter

By default, the first row of the CSV file is used for header (columns labels). But if our data file has just the data, we should say to read_csv not to use header like that:

In [None]:
csv_df = pd.read_csv("../../datasets/various/drinks.csv", sep=",", header=None)
csv_df.head(5)

# note, that the header row, now is part of the data:

#### names parameter

We can set list of column names to use. 
Note, that we must explicitly pass ``header=0``, if we have a header row in our CSV, to be able to replace existing names.

In [None]:
csv_df = pd.read_csv("../../datasets/various/drinks.csv",
                   header=0,
                   names=['A','B','C','D','E','F'])
csv_df.head(3)

#### Loading big files - nrows parameter

Number of rows of file to read. Useful for reading pieces of large files.

Other useful parameters are <b>chunksize</b> and <b>iterator</b>

In [8]:
csv_df = pd.read_csv("../../../datasets/various/drinks.csv", nrows=5)
csv_df

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,AS
1,Albania,89,132,54,4.9,EU
2,Algeria,25,0,14,0.7,AF
3,Andorra,245,138,312,12.4,EU
4,Angola,217,57,45,5.9,AF


### From Excel files

In [7]:
xls_df = pd.read_excel('../../../datasets/imdb/imdb_movie_ratings_top_1000_genre_sorted.xlsx')
xls_df.head()

Unnamed: 0,star_rating,title,content_rating,genre,duration,actors_list
0,9.0,The Dark Knight,PG-13,Action,152,"[u'Christian Bale', u'Heath Ledger', u'Aaron E..."
1,8.8,Inception,PG-13,Action,148,"[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'..."
2,8.8,Star Wars: Episode V - The Empire Strikes Back,PG,Action,124,"[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi..."
3,8.7,Star Wars,PG,Action,121,"[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi..."
4,8.7,The Matrix,R,Action,136,"[u'Keanu Reeves', u'Laurence Fishburne', u'Car..."


### From JSON files

In [6]:
json_df = pd.read_json('../../../datasets/python_books/books.json')
json_df.head()

Unnamed: 0,title,author,authorUrl,level
0,How to Make Mistakes in Python,Mike Pirnat,http://mike.pirnat.com/,Intermediate
1,Functional Programming in Python,David Mertz,http://www.oreilly.com/programming/free/functi...,Intermediate
2,Picking a Python Version: A Manifesto,David Mertz,http://www.oreilly.com/programming/free/from-f...,Beginner
3,Python para Desenvolvedores (2nd Edition),Luiz Eduardo Borges,http://ark4n.wordpress.com/python/,Intermediate
4,Intermediate Python,Muhammad Yasoob,http://pythontips.com/,Intermediate
