# Making Dataframes

Examples of making pandas dataframes from data structured in various formats


---



In [1]:
import pandas as pd
import numpy as np
import json 

import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

ModuleNotFoundError: No module named 'pandas'

# 1. From CSV String

In [100]:
csv_string = "Name,Location,Points\nBob,NJ,42\nSally,NY,100"

csv_string = StringIO(
    """
    Name,Location,Points
    Bob,NJ,42
    Sally,NY,100
    """)

df = pd.read_csv(csv_string, sep=",")

print(df)

        Name Location  Points
0        Bob       NJ      42
1      Sally       NY     100


# 2) From "Array of _Labeled_ JSON Objects"


When each object in the the JSON array has a label, id an "index". Notice how this _index_ object labeled is added ad the index for each row in the dataframe.

Shows making a dataframe from a python dictionary and from a JSON string. 

Call `read_json` with the `orient` property set to `index`.



In [101]:

dictionary_to_json = json.dumps({"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}})

string_of_json = '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}'


df_from_dictionary = pd.read_json(dict_to_json, orient='index')

df_from_string = pd.read_json(string_of_json, orient='index')


print(df_from_dictionary)
print('\n')
print(df_from_string)



      col 1 col 2
row 1     a     b
row 2     c     d


      col 1 col 2
row 1     a     b
row 2     c     d


# 3) From List of JSON Objects
---

Call read_json with the orient param set to "records". 



In [102]:
list_of_records = json.dumps([{
    "Name": "Bob",
    "Location": "NJ",
    "Points": 42
}])

string_of_records = '[{"Name":"a","Location":"b"},{"Name":"c","Location":"d"}]'

df_from_string = pd.read_json(string_of_records, orient='records')

df_from_list = pd.read_json(list_of_records, orient='records')

print(df_from_string)
print('\n')
print(df_from_list)



  Name Location
0    a        b
1    c        d


  Name Location  Points
0  Bob       NJ      42


In [0]:
d = f

# 4) From "CSV-Style Arrays"

Nested arrays where the first contains the columns labels and the rest contain each row's data for all the columns.

In [104]:
row_data_values = [
    ["Name", "Location", "Points"],
    ["Bob", "NJ", 42],
    ["Sally", "NY", 4100],
    ["Jim", "Ca Foo", 342.2],
    ["Tony Gozolez", "Oklahoma", 5000]
]

# Removes element from the original array
titles = row_data_values.pop(0)

df_from_csv_style_arrays = pd.DataFrame(row_data_values, columns=titles)

df_from_csv_style_arrays

Unnamed: 0,Name,Location,Points
0,Bob,NJ,42.0
1,Sally,NY,4100.0
2,Jim,Ca Foo,342.2
3,Tony Gozolez,Oklahoma,5000.0


# From numpy array is Cool Too!



In [105]:
numpy_data = np.array([[1, 2], [3, 4], [10, 11]])
df = pd.DataFrame(data=numpy_data, columns=["column1", "column2"])

df

Unnamed: 0,column1,column2
0,1,2
1,3,4
2,10,11


# 5) From CSV-Style, "Columns Oriented Lists" Add the Transpose

In [106]:
row_data_values = [
    ["Name", "Location", "Points"],
    ["Bob", "Sally", "Jim", "Tony Gozolez"],
    ["NJ", "NY", "California", "Las Vegas"],
    [100, 42, 342.2, 5000]
]

# Removes element from the original array
titles = row_data_values.pop(0)

df_from_csv_style_arrays = pd.DataFrame(row_data_values, index=titles).T

df_from_csv_style_arrays

Unnamed: 0,Name,Location,Points
0,Bob,NJ,100.0
1,Sally,NY,42.0
2,Jim,California,342.2
3,Tony Gozolez,Las Vegas,5000.0
