Learn the basics on the pandas python library in this tutorial!

In [None]:
import pandas as pd
import numpy as np

## Series

In [None]:
mydata = ['Boat', 'Car', 'Bike', 'Truck']

myseries1 = pd.Series(mydata)

In [None]:
print(myseries1)

In [None]:
mydata = [1, 55, 99, 43]
myseries2 = pd.Series(mydata)
print(myseries2)

## DataFrame

In [None]:
mydfdata = [('Boat', 1), ('Car', 55), ('Bike',99), ('Truck', 43)]

mydf = pd.DataFrame(mydfdata, columns=['thing', 'count'])

mydf

In [None]:
mydf.dtypes

# Reading in Data

In [None]:
df = pd.read_csv('../input/mr-beast-youtube-video-statistics/MrBeast_youtube_stats.csv')

# Inspect The Data

In [None]:
# Head / Tail
df.head()

In [None]:
df.tail()

In [None]:
df.dtypes

In [None]:
df.describe()

# Columns and Rows

In [None]:
df['viewCount']

In [None]:
df.loc[4]

In [None]:
df = df.set_index('id')

# Subsetting Data

In [None]:
df.shape

In [None]:
# Subsetting Columns
df = df[['title', 'description', 'publishTime',
         'duration_seconds', 'viewCount', 'likeCount',
         'commentCount']]

In [None]:
# Subsetting using loc
df_subset1 = df.loc[df['viewCount'] > 1_000_000]

In [None]:
df_subset2 = df.query('viewCount > 1000000')

In [None]:
df = df.loc[~df['likeCount'].isna()]

# Casting dtypes

In [None]:
df['viewCount'] = df['viewCount'].astype('int')
df['likeCount'] = df['likeCount'].astype('int')

In [None]:
df['publishTime'] = pd.to_datetime(df['publishTime'])

In [None]:
df['likeCount'] = pd.to_numeric(df['likeCount'].astype('str'))

# Creating new column

In [None]:
df['like_to_view_ratio'] = df['likeCount'] / df['viewCount']

# Adding new Row

In [None]:
df_to_append = df.tail(1)

In [None]:
df_concat = pd.concat([df, df_to_append])

# Plot Examples

In [None]:
import matplotlib.pylab as plt
plt.style.use('ggplot')

In [None]:
df['viewCount'].plot(kind='hist', bins=50,
                     title='Distribution of View Count',
                     figsize=(15, 5))

In [None]:
df.plot(kind='scatter', x='viewCount', y='likeCount', title='View vs Like Count')

In [None]:
df.query('likeCount > 10000000')

## Save our output

In [None]:
df.to_csv('processed_data.csv')