In [4]:
import pandas as pd
import numpy as np

# Basic Numpy Setup
Some of the different ways of creating numpy arrays of different dimensions

In [123]:
print("1d numpy array\n")
np_array1d = np.array([5,10,15,20,25,30])
print(np_array1d)
print("Dimensions:", np_array1d.ndim)
print("Shape:", np_array1d.shape)
print(type(np_array1d))
print()

print("2d numpy array with single elements in the first dimension\n")
np_array2d16 = np.array([[5,10,15,20,25,30]])
print(np_array2d16)
print("Dimensions:", np_array2d16.ndim)
print("Shape:", np_array2d16.shape)
print(type(np_array2d16))
print()

print("2d numpy array with single elements in the second dimension\n")
np_array2d61 = np.array([[5],[10],[15],[20],[25],[30]])
print(np_array2d61)
print("Dimensions:", np_array2d61.ndim)
print("Shape:", np_array2d61.shape)
print(type(np_array2d61))
print()

print("2d numpy array with different number of multiple elements in the second dimension\n")
np_array2different = np.array([[5,6],[10,11],[15,16],[20],[25],[30]])
print(np_array2different)
print("Dimensions:", np_array2different.ndim)
print("Shape:", np_array2different.shape)
print(type(np_array2different))
print()

print("2d numpy array with multiple elements in the second dimension\n")
np_array2d62 = np.array([[5,6],[10,11],[15,16],[20,21],[25,26],[30,31]])
print(np_array2d62)
print("Dimensions:", np_array2d62.ndim)
print("Shape:", np_array2d62.shape)
print(type(np_array2d62))
print()
   

1d numpy array

[ 5 10 15 20 25 30]
Dimensions: 1
Shape: (6,)
<class 'numpy.ndarray'>

2d numpy array with single elements in the first dimension

[[ 5 10 15 20 25 30]]
Dimensions: 2
Shape: (1, 6)
<class 'numpy.ndarray'>

2d numpy array with single elements in the second dimension

[[ 5]
 [10]
 [15]
 [20]
 [25]
 [30]]
Dimensions: 2
Shape: (6, 1)
<class 'numpy.ndarray'>

2d numpy array with different number of multiple elements in the second dimension

[[5, 6] [10, 11] [15, 16] [20] [25] [30]]
Dimensions: 1
Shape: (6,)
<class 'numpy.ndarray'>

2d numpy array with multiple elements in the second dimension

[[ 5  6]
 [10 11]
 [15 16]
 [20 21]
 [25 26]
 [30 31]]
Dimensions: 2
Shape: (6, 2)
<class 'numpy.ndarray'>



# Basic Pandas Setup

## Data Frames
Some of the different ways of manually creating pandas DataFrames

In [124]:
# from dictionary
test_df = pd.DataFrame({'Column1' : 1,
                        'Column2' : [5,10,15,20,25,30]})
print(test_df.head())
print()

#same as above but using a list of lists rather than dictionary.
test_df = pd.DataFrame([[1,5], [1,10], [1,15], [1,20], [1,25], [1,30]])
print(test_df.head())
print()

# from 1d npArray
test_df = pd.DataFrame({'Column1' : 1,
                        'Column2' : np_array1d})
print(test_df.head())
print()

# from 2d numpy array with single element in the first dimension
test_df = pd.DataFrame(np_array2d16)
print(test_df.head())
print()

# from 2d numpy array with single elements in the second dimension
test_df = pd.DataFrame(np_array2d61)
print(test_df.head())
print()

# from 2d numpy array with different number of multiple elements in the second dimension
test_df = pd.DataFrame(np_array2different)
print(test_df.head())
print()

# from 2d npArray - 2 dimensions
test_df = pd.DataFrame(np_array2d62)
print(test_df.head())
print()

   Column1  Column2
0        1        5
1        1       10
2        1       15
3        1       20
4        1       25

   0   1
0  1   5
1  1  10
2  1  15
3  1  20
4  1  25

   Column1  Column2
0        1        5
1        1       10
2        1       15
3        1       20
4        1       25

   0   1   2   3   4   5
0  5  10  15  20  25  30

    0
0   5
1  10
2  15
3  20
4  25

          0
0    [5, 6]
1  [10, 11]
2  [15, 16]
3      [20]
4      [25]

    0   1
0   5   6
1  10  11
2  15  16
3  20  21
4  25  26



## Series
Some of the different ways of creating a pandas Series
* When creating from an nparray, the source must only have 1 dimension otherwise this will give an error.

In [112]:
test_series = pd.Series([5,10,15,20,25,30])
print(test_series.head())
print()

test_series = pd.Series(np_array1d)
print(test_series.head())
print(test_series.shape)

0     5
1    10
2    15
3    20
4    25
dtype: int64

0     5
1    10
2    15
3    20
4    25
dtype: int32
(6,)


# Conversion Pandas -> Numpy

DataFrame column selection - select by list gives dataframe, select by single value gives series

In [113]:
print("Select single column by single value (df[])")
newdf = test_df[1]
print(newdf)
print(type(newdf))

print()
print("Select single (or multiple) columns by list (df[[]])")
newdf = test_df[[1]]
print(newdf)
print(type(newdf))


Select single column by single value (df[])
0     5
1    10
2    15
3    20
4    25
5    30
Name: 1, dtype: int64
<class 'pandas.core.series.Series'>

Select single (or multiple) columns by list (df[[]])
    1
0   5
1  10
2  15
3  20
4  25
5  30
<class 'pandas.core.frame.DataFrame'>


Dataframe to numpy conversions

In [127]:
print("Input Dataframe (2 columns):")
print(test_df.head(10))
print("DataFrame.values:")
print(type(test_df.values), test_df.values.shape)
print(test_df.values)
print()

print("Input Dataframe (1 column):")
print(test_df[[1]].head(10))
print("DataFrame.values:")
print(type(test_df[[1]].values), test_df[[1]].values.shape)
print(test_df[[1]].values)
print()

Input Dataframe (2 columns):
    0   1
0   5   6
1  10  11
2  15  16
3  20  21
4  25  26
5  30  31
DataFrame.values:
<class 'numpy.ndarray'> (6, 2)
[[ 5  6]
 [10 11]
 [15 16]
 [20 21]
 [25 26]
 [30 31]]

Input Dataframe (1 column):
    1
0   6
1  11
2  16
3  21
4  26
5  31
DataFrame.values:
<class 'numpy.ndarray'> (6, 1)
[[ 6]
 [11]
 [16]
 [21]
 [26]
 [31]]



Series to numpy conversions. Note this is not the same as a conversion from a dataframe with 1 column

In [126]:
print("Input Series:")
print(test_series.head(10))
print("Series.values:")
print(type(test_series.values), test_series.values.shape)
print(test_series.values)
print()

Input Series:
0     5
1    10
2    15
3    20
4    25
5    30
dtype: int32
Series.values:
<class 'numpy.ndarray'> (6,)
[ 5 10 15 20 25 30]



# Operations

Addition

In [121]:
print("Adding series.values (1d np array) and 1d numpy array:")
print(test_series.values + np_array1d)

print("\nAdding series.values (1d np array) with 2d (1,x) array:")
print(test_series.values + np_array2d16)

print("\nAdding series.values (1d np array) with 2d (x,1) array:")
print(test_series.values + np_array2d61)

print("\nAdding two 2d (x,1) arrays):")
print(np_array2d61 + np_array2d61)

print("\nAdding two 2d (1,x) arrays):")
print(np_array2d16 + np_array2d16)

print("\nAdding single column DataFrame and 2d (x,1) arrays:")
result = test_df[[1]] + np_array2d61
print(result)
#print("Dimensions:", np_array2d16.ndim)
#print("Shape:", np_array2d16.shape)
print(type(result))

print("\nAdding single column DataFrame and 1d np arrays:")
print("test_df[[1]] + np_array1d - this doesn't work and gives a 'ValueError: Unable to coerce to Series, length must be 1: given 6' error. Use series.values instead")

print("\nAdding single column DataFrame and 2d (1,x) arrays:")
print("test_df[[1]] + np_array2d16 - this doesn't work and gives a 'ValueError: Unable to coerce to DataFrame, shape must be (6, 1): given (1, 6)' error. Use series.values instead")

print("\nAdding series and 1d np arrays:")
print(test_series + np_array1d)

print("\nAdding series and np arrays:")
print("test_series + np_array2d16 - this doesn't work and gives a 'Exception: Data must be 1-dimensional' error. Use series.values instead")


Adding series.values (1d np array) and 1d numpy array:
[10 20 30 40 50 60]

Adding series.values (1d np array) with 2d (1,x) array:
[[10 20 30 40 50 60]]

Adding series.values (1d np array) with 2d (x,1) array:
[[10 15 20 25 30 35]
 [15 20 25 30 35 40]
 [20 25 30 35 40 45]
 [25 30 35 40 45 50]
 [30 35 40 45 50 55]
 [35 40 45 50 55 60]]

Adding two 2d (x,1) arrays):
[[10]
 [20]
 [30]
 [40]
 [50]
 [60]]

Adding two 2d (1,x) arrays):
[[10 20 30 40 50 60]]

Adding single column DataFrame and 2d (x,1) arrays:
    1
0  10
1  20
2  30
3  40
4  50
5  60
<class 'pandas.core.frame.DataFrame'>

Adding single column DataFrame and 1d np arrays:
test_df[[1]] + np_array1d - this doesn't work and gives a 'ValueError: Unable to coerce to Series, length must be 1: given 6' error. Use series.values instead

Adding single column DataFrame and 2d (1,x) arrays:
test_df[[1]] + np_array2d16 - this doesn't work and gives a 'ValueError: Unable to coerce to DataFrame, shape must be (6, 1): given (1, 6)' error. U

## Numpy Array Manipulation
np.reshape - gives a new shape to an array without changing the data<br/>
np.ravel - returns a flattened 1d array (only copies data if necessary and usually returns a view)<br/>
np.flatten - returns a copy of the array collapsed into one dimension

np.zeros((3,3)).ravel()<br/>
array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

The importance of ravel over flatten is ravel only copies data if necessary and usually returns a view, while flatten will always return a copy of the data.

To use reshape to flatten the array:

tt = t.reshape(-1)