<h1>Pandas Intro</h1>

In [2]:
import pandas as pd

<p>An Empty Series</p>

In [3]:
my_series = pd.Series()
print(my_series)

Series([], dtype: object)


In [4]:
import numpy as np

In [5]:
my_array = np.array([10, 20, 30, 40, 50])
my_series = pd.Series(my_array)

print(my_series)

0    10
1    20
2    30
3    40
4    50
dtype: int64


<p>Custom Indices</p>

In [6]:
my_series = pd.Series(my_array, index=["num1", "num2", "num3", "num4", "num5"])
print(my_series)

num1    10
num2    20
num3    30
num4    40
num5    50
dtype: int64


<p>Passing Indices on series definition</p>

In [7]:
my_series = pd.Series([10, 20, 30, 40, 50], index=(["num1", "num2", "num3", "num4", "num5"]))
print(my_series)

num1    10
num2    20
num3    30
num4    40
num5    50
dtype: int64


<p>Defining A Series Using Scalar Values</p>

In [9]:
my_series=pd.Series(25, index=(["num1", "num2", "num3", "num4", "num5"]))
print(my_series)

num1    25
num2    25
num3    25
num4    25
num5    25
dtype: int64


<p>Defining a series using a dictionary</p>

In [10]:
my_dict = {
    "num1": 6,
    "num1": 7,
    "num1": 8
}

my_series = pd.Series(my_dict)
print(my_series)

num1    8
dtype: int64


<p>Finding the Min and Max values in a series</p>

In [11]:
import pandas as pd
my_series = pd.Series([1, 2, 3, 4, 5])

print("Series Min")
print(np.min(my_series))
print("Series Max")
print(np.max(my_series))

Series Min
1
Series Max
5


<p>The Mean in a series</p>

In [13]:
print(my_series.mean())
print(np.mean(my_series))

3.0
3.0


<p>Finding the Median of a series</p>

In [15]:
print(my_series.median())
print(np.median(my_series))

3.0
3.0


<p>Finding the data type of a series</p>

In [16]:
print(my_series.dtype)

int64


<p>Converting a pandas series to a list</p>

In [17]:
print(my_series.tolist())

[1, 2, 3, 4, 5]


<h2>Pandas DataFrame</h2>

<p>Empty DataFrame</p>

In [18]:
my_df = pd.DataFrame()
print(my_df)

Empty DataFrame
Columns: []
Index: []


<p>Creating a Dataframe using a 2D list</p>

In [19]:
import pandas as pd
scores = [
    ["Mathematics", 85],
    ["English", 91],
    ["History", 95]
]

my_df = pd.DataFrame(scores, columns=["Subject", "Score"])
my_df

Unnamed: 0,Subject,Score
0,Mathematics,85
1,English,91
2,History,95


<P>Create a Dataframe using dictionaries</P>

In [20]:
scores = {
    'Subject': ["Mathematics", "History", "English", "Science", "Arts"],
    'Score': [98, 75, 68, 82, 99]
}

my_df = pd.DataFrame(scores)

print(my_df)

       Subject  Score
0  Mathematics     98
1      History     75
2      English     68
3      Science     82
4         Arts     99


<p>Creating a DataFrame using a List of strings</p>

In [21]:
import pandas as pd
scores = [
    {'Subject': 'Mathematics', 'Score': 90},
    {'Subject': 'English', 'Score': 75},
    {'Subject': 'Geography', 'Score': 85},
    {'Subject': 'History', 'Score': 80},
    {'Subject': 'Programming', 'Score': 100},
]

my_df = pd.DataFrame(scores)
print(my_df)

       Subject  Score
0  Mathematics     90
1      English     75
2    Geography     85
3      History     80
4  Programming    100


<p>A dataframe using a list if dictionaries using null items</p>

In [23]:
scores = [
    {'Subject': 'Mathematics', 'Score': 85},
    {'Subject': 'English', 'Score': 75},
    {'Subject': 'Geography', 'Score': 93},
    {'Subject': 'Geography', 'Score': 95},
    {'Subject': 'History', 'Score': 80},
    {'Score': 72}
]

my_df = pd.DataFrame(scores)
print(my_df)

       Subject  Score
0  Mathematics     85
1      English     75
2    Geography     93
3    Geography     95
4      History     80
5          NaN     72


<p>Using DataFrames to view sample data at the top of the record</p>

In [25]:
my_df.head(2)

Unnamed: 0,Subject,Score
0,Mathematics,85
1,English,75


<p>Using Dataframes to view sample data from the bottom of the record</p>

In [26]:
my_df.tail(2)

Unnamed: 0,Subject,Score
4,History,80
5,,72


<p>Getting the summary of a Dataframe</p>

In [27]:
my_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Subject  5 non-null      object
 1   Score    6 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 224.0+ bytes


<p>Getting information on numeric columns for the dataframe</p>

In [28]:
my_df.describe()

Unnamed: 0,Score
count,6.0
mean,83.333333
std,9.395034
min,72.0
25%,76.25
50%,82.5
75%,91.0
max,95.0


<h2>Importing data in Pandas</h2>

<p>Importing CSV files</p>

In [31]:
some_data = pd.read_csv('./Whatsgoodly - Thought Catalog Influencers.csv')
some_data.head(10)

Unnamed: 0,Question,Segment Type,Segment Description,Answer,Count,Percentage
0,What do you think when an influencer is obviou...,Mobile,Global results,Is this product cool?,268,0.226
1,What do you think when an influencer is obviou...,Mobile,Global results,This is lame,532,0.449
2,What do you think when an influencer is obviou...,Mobile,Global results,Get that money!,293,0.247
3,What do you think when an influencer is obviou...,Mobile,Global results,Other (comment),91,0.077
4,What do you think when an influencer is obviou...,Web,Web,Is this product cool?,0,0.0
5,What do you think when an influencer is obviou...,Web,Web,This is lame,0,0.0
6,What do you think when an influencer is obviou...,Web,Web,Get that money!,0,0.0
7,What do you think when an influencer is obviou...,Web,Web,Other (comment),0,0.0
8,What do you think when an influencer is obviou...,Gender,Female voters,Is this product cool?,71,0.165
9,What do you think when an influencer is obviou...,Gender,Female voters,This is lame,220,0.51


In [32]:
some_data.describe()

Unnamed: 0,Count,Percentage
count,5460.0,5460.0
mean,36.229121,0.249451
std,91.652298,0.283424
min,0.0,0.0
25%,0.0,0.0
50%,4.0,0.158
75%,27.0,0.4
max,1479.0,1.0


<p>Importing Data from MySql</p>

In [29]:
import os

import pymysql
from dotenv import find_dotenv, load_dotenv
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

load_dotenv(find_dotenv())

MYSQL_DB = os.environ.get("MYSQL_DB")
MYSQL_USERNAME = os.environ.get("MYSQL_USERNAME")
MYSQL_PASSWORD = os.environ.get("MYSQL_PASSWORD")
MYSQL_URL = os.environ.get("MYSQL_URL")
MYSQL_PORT = os.environ.get("MYSQL_PORT")

engine = create_engine(mysql+pymysql:// + MYSQL_USERNAME + ':' + MYSQL_PASSWORD + '@' + MYSQL_URL + ':' + int(MYSQL_PORT) + '/' + MYSQL_DB)

employees_df = pd.read_sql_table('city', con=engine)

print(employees_df)

SyntaxError: invalid syntax (553624628.py, line 17)