## DataFrames

In [2]:
import pandas as pd
import numpy as np

In [3]:
forum_users = {
    'UserID': np.array([1, 2, 3, 4, 5]),
    'Username': ['jane_smith', 'alex123', 'bob56', 'mark_wilson', 'test_name'],
    'Age': [18, 35, 25, 28, None],
    'Joined Date': pd.to_datetime(['2034-01-01', '2034-02-15', '2034-04-25', '2034-06-21', '2034-09-15']),
    'Total Posts': [150, 230, 80, 420, 310],
    'Reputation': [500, 720, 200, 940, 500]
}

df = pd.DataFrame(forum_users)
df

Unnamed: 0,UserID,Username,Age,Joined Date,Total Posts,Reputation
0,1,jane_smith,18.0,2034-01-01,150,500
1,2,alex123,35.0,2034-02-15,230,720
2,3,bob56,25.0,2034-04-25,80,200
3,4,mark_wilson,28.0,2034-06-21,420,940
4,5,test_name,,2034-09-15,310,500


In [4]:
type(df)

pandas.core.frame.DataFrame

In [5]:
df.columns

Index(['UserID', 'Username', 'Age', 'Joined Date', 'Total Posts',
       'Reputation'],
      dtype='object')

In [6]:
df.columns.tolist()

['UserID', 'Username', 'Age', 'Joined Date', 'Total Posts', 'Reputation']

In [7]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [8]:
df.index.tolist()

[0, 1, 2, 3, 4]

In [9]:
df.dtypes

UserID                  int64
Username               object
Age                   float64
Joined Date    datetime64[ns]
Total Posts             int64
Reputation              int64
dtype: object

In [10]:
df.values

array([[1, 'jane_smith', 18.0, Timestamp('2034-01-01 00:00:00'), 150,
        500],
       [2, 'alex123', 35.0, Timestamp('2034-02-15 00:00:00'), 230, 720],
       [3, 'bob56', 25.0, Timestamp('2034-04-25 00:00:00'), 80, 200],
       [4, 'mark_wilson', 28.0, Timestamp('2034-06-21 00:00:00'), 420,
        940],
       [5, 'test_name', nan, Timestamp('2034-09-15 00:00:00'), 310, 500]],
      dtype=object)

In [11]:
type(df.values)

numpy.ndarray

In [12]:
df.head(3)

Unnamed: 0,UserID,Username,Age,Joined Date,Total Posts,Reputation
0,1,jane_smith,18.0,2034-01-01,150,500
1,2,alex123,35.0,2034-02-15,230,720
2,3,bob56,25.0,2034-04-25,80,200


In [13]:
df.values[1, 1]

'alex123'

In [14]:
df.tail(2)

Unnamed: 0,UserID,Username,Age,Joined Date,Total Posts,Reputation
3,4,mark_wilson,28.0,2034-06-21,420,940
4,5,test_name,,2034-09-15,310,500


In [15]:
df.describe().round(2)

Unnamed: 0,UserID,Age,Joined Date,Total Posts,Reputation
count,5.0,4.0,5,5.0,5.0
mean,3.0,26.5,2034-04-28 09:36:00,238.0,572.0
min,1.0,18.0,2034-01-01 00:00:00,80.0,200.0
25%,2.0,23.25,2034-02-15 00:00:00,150.0,500.0
50%,3.0,26.5,2034-04-25 00:00:00,230.0,500.0
75%,4.0,29.75,2034-06-21 00:00:00,310.0,720.0
max,5.0,35.0,2034-09-15 00:00:00,420.0,940.0
std,1.58,7.05,,133.3,276.62


In [16]:
df.select_dtypes(include='object')

Unnamed: 0,Username
0,jane_smith
1,alex123
2,bob56
3,mark_wilson
4,test_name


In [17]:
df.select_dtypes(exclude='object')

Unnamed: 0,UserID,Age,Joined Date,Total Posts,Reputation
0,1,18.0,2034-01-01,150,500
1,2,35.0,2034-02-15,230,720
2,3,25.0,2034-04-25,80,200
3,4,28.0,2034-06-21,420,940
4,5,,2034-09-15,310,500


In [18]:
df.select_dtypes(exclude=['object', 'int64'])

Unnamed: 0,Age,Joined Date
0,18.0,2034-01-01
1,35.0,2034-02-15
2,25.0,2034-04-25
3,28.0,2034-06-21
4,,2034-09-15


In [19]:
df.select_dtypes(include='object').columns

Index(['Username'], dtype='object')

In [20]:
df.select_dtypes(include='int64')

Unnamed: 0,UserID,Total Posts,Reputation
0,1,150,500
1,2,230,720
2,3,80,200
3,4,420,940
4,5,310,500


In [21]:
df.isna()

Unnamed: 0,UserID,Username,Age,Joined Date,Total Posts,Reputation
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,True,False,False,False


In [22]:
df.isna().sum()

UserID         0
Username       0
Age            1
Joined Date    0
Total Posts    0
Reputation     0
dtype: int64

In [23]:
type(df.isna().sum())

pandas.core.series.Series

## Series

In [24]:
df['Username']

0     jane_smith
1        alex123
2          bob56
3    mark_wilson
4      test_name
Name: Username, dtype: object

In [25]:
type(df['Username'])

pandas.core.series.Series

In [26]:
username_series = df['Username']
username_series

0     jane_smith
1        alex123
2          bob56
3    mark_wilson
4      test_name
Name: Username, dtype: object

In [27]:
username_series.values

array(['jane_smith', 'alex123', 'bob56', 'mark_wilson', 'test_name'],
      dtype=object)

In [28]:
type(username_series.values)

numpy.ndarray

In [29]:
username_series.index

RangeIndex(start=0, stop=5, step=1)

In [30]:
df['Username'].value_counts().index

Index(['jane_smith', 'alex123', 'bob56', 'mark_wilson', 'test_name'], dtype='object', name='Username')

In [31]:
df['Reputation'].value_counts()

Reputation
500    2
720    1
200    1
940    1
Name: count, dtype: int64

In [32]:
df['Reputation'].value_counts(ascending=True)

Reputation
720    1
200    1
940    1
500    2
Name: count, dtype: int64

In [33]:
df['Reputation'].unique()

array([500, 720, 200, 940])

In [36]:
df['Username'].sort_values()

1        alex123
2          bob56
0     jane_smith
3    mark_wilson
4      test_name
Name: Username, dtype: object

In [37]:
df['Username'].sort_values(ascending=False)

4      test_name
3    mark_wilson
0     jane_smith
2          bob56
1        alex123
Name: Username, dtype: object