![image.png](attachment:image.png)

In [3]:
import pandas as pd
import numpy as np

In [4]:
my_dates_index = pd.date_range('20220112', periods=6)
my_dates_index

DatetimeIndex(['2022-01-12', '2022-01-13', '2022-01-14', '2022-01-15',
               '2022-01-16', '2022-01-17'],
              dtype='datetime64[ns]', freq='D')

##### sample NumPy data


In [5]:
sample_numpy_data = np.array(np.arange(24)).reshape((6,4))
sample_numpy_data

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

##### sample data frame, with column headers; uses our dates_index
documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html

In [6]:
sample_df = pd.DataFrame(sample_numpy_data, index=my_dates_index, columns=[list('ABCD')])
sample_df

Unnamed: 0,A,B,C,D
2022-01-12,0,1,2,3
2022-01-13,4,5,6,7
2022-01-14,8,9,10,11
2022-01-15,12,13,14,15
2022-01-16,16,17,18,19
2022-01-17,20,21,22,23


In [7]:
sample_df['2022-01-13':'2022-01-16']

Unnamed: 0,A,B,C,D
2022-01-13,4,5,6,7
2022-01-14,8,9,10,11
2022-01-15,12,13,14,15
2022-01-16,16,17,18,19


##### data frame from a Python dictionary


In [8]:
df_from_dictionary = pd.DataFrame({ 
                         'float' : 1.,
                         'time' : pd.Timestamp('20241124'),
                         'seq' : np.arange(1,5),
                         'array' : np.array([3] * 4,dtype='int32'),
                         'categories' : ["test","train","taxes","tools"],
                         'dull' : 'boring data' 
                      })
df_from_dictionary

Unnamed: 0,float,time,seq,array,categories,dull
0,1.0,2024-11-24,1,3,test,boring data
1,1.0,2024-11-24,2,3,train,boring data
2,1.0,2024-11-24,3,3,taxes,boring data
3,1.0,2024-11-24,4,3,tools,boring data


In [9]:
df_from_dictionary = pd.DataFrame({ 
                         'float' : 1.,
                         'time' : pd.date_range('20220112', periods=4),
                         'seq' : np.arange(1,5),  
                         'array' : np.array([3] * 4,dtype='int32'),
                         'categories' : ["test","train","taxes","tools"],
                         'dull' : 'boring data' 
                      }, ['a', 'b', 'c', 'd'])
df_from_dictionary

Unnamed: 0,float,time,seq,array,categories,dull
a,1.0,2022-01-12,1,3,test,boring data
b,1.0,2022-01-13,2,3,train,boring data
c,1.0,2022-01-14,3,3,taxes,boring data
d,1.0,2022-01-15,4,3,tools,boring data


##### pandas retains data type for each column


In [10]:
df_from_dictionary.dtypes

float                float64
time          datetime64[ns]
seq                    int32
array                  int32
categories            object
dull                  object
dtype: object

In [12]:
df_from_dictionary.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, a to d
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   float       4 non-null      float64       
 1   time        4 non-null      datetime64[ns]
 2   seq         4 non-null      int32         
 3   array       4 non-null      int32         
 4   categories  4 non-null      object        
 5   dull        4 non-null      object        
dtypes: datetime64[ns](1), float64(1), int32(2), object(2)
memory usage: 192.0+ bytes


##### head and tail; default is 5 rows


In [13]:
sample_df.head(2)

Unnamed: 0,A,B,C,D
2022-01-12,0,1,2,3
2022-01-13,4,5,6,7


In [14]:
sample_df.tail()

Unnamed: 0,A,B,C,D
2022-01-13,4,5,6,7
2022-01-14,8,9,10,11
2022-01-15,12,13,14,15
2022-01-16,16,17,18,19
2022-01-17,20,21,22,23


##### underlying data: values, index and columns


In [15]:
sample_df.values

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [16]:
type(sample_df.values)

numpy.ndarray

In [17]:
sample_df.index

DatetimeIndex(['2022-01-12', '2022-01-13', '2022-01-14', '2022-01-15',
               '2022-01-16', '2022-01-17'],
              dtype='datetime64[ns]', freq='D')

In [19]:
sample_df.columns

MultiIndex([('A',),
            ('B',),
            ('C',),
            ('D',)],
           )

##### transpose rows and columns

In [29]:
sample_df

Unnamed: 0,A,B,C,D
2022-01-12,0,1,2,3
2022-01-13,4,5,6,7
2022-01-14,8,9,10,11
2022-01-15,12,13,14,15
2022-01-16,16,17,18,19
2022-01-17,20,21,22,23


In [20]:
sample_df.T

Unnamed: 0,2022-01-12,2022-01-13,2022-01-14,2022-01-15,2022-01-16,2022-01-17
A,0,4,8,12,16,20
B,1,5,9,13,17,21
C,2,6,10,14,18,22
D,3,7,11,15,19,23
