# Basic pandas syntax

In [51]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import plotly_express as px

# for reproducibility - gets same set of values
np.random.seed(1337)

dice_series = pd.Series([np.random.randint(1, 6) for _ in range(10)])
dice_series.head()  # inspects the five first rows




0    5
1    1
2    2
3    3
4    3
dtype: int64

In [52]:
programs_dict = {"test1": 1,
                 "test2": 2,
                 "test3": 3}

programs_series = pd.Series(programs_dict)

df = pd.DataFrame(data=programs_series)
df

df = pd.DataFrame(data=programs_series, columns=("test",))
df


Unnamed: 0,test
test1,1
test2,2
test3,3


In [53]:
col1_dict = pd.Series(data={"t1": 1,
                            "t2": 2})

col2_dict = pd.Series(data={"t1": 1,
                            "t2": 2})

df_dict = {"colhead1": col1_dict,
           "colhead2": col2_dict}

df = pd.DataFrame(data=df_dict)
df

Unnamed: 0,colhead1,colhead2
t1,1,1
t2,2,2


In [54]:
series1 = pd.Series({"s1dk1": 1,
                     "s1dk2": 2,
                     "s1dk3": 3})

series2 = pd.Series({"s1dk1": 1,
                     "s1dk2": 2,
                     "s1dk3": 3})

df_dict = {"series1": series1, "series2": series2}

dataframe = pd.DataFrame(df_dict)
dataframe

Unnamed: 0,series1,series2
s1dk1,1,1
s1dk2,2,2
s1dk3,3,3


In [55]:
# Both do the same thing.

df_programs_over_29 = dataframe[dataframe["series1"] >= 2]
df_programs_over_29


series1 = pd.Series({"s1dk1": False,
                     "s1dk2": True,
                     "s1dk3": True})


df_programs_over_29 = dataframe[series1]
df_programs_over_29


Unnamed: 0,series1,series2
s1dk2,2,2
s1dk3,3,3


In [56]:
series1 = pd.Series({"s1dk1": 1,
                     "s1dk2": 2,
                     "s1dk3": 3})

series2 = pd.Series({"s1dk1": "11 hew",
                     "s1dk2": "11 hew",
                     "s1dk3": "11 hew"})

df_dict = {"series1": series1, "series2": series2}

dataframe = pd.DataFrame(df_dict)
dataframe



# converting Calories to int
dataframe["series2"] = dataframe["series2"].str[:-3].astype(int)
dataframe.sort_values(by="series1")



Unnamed: 0,series1,series2
s1dk1,1,11
s1dk2,2,11
s1dk3,3,11


In [57]:
dataframe.columns = ['new_col1', 'new_col2']
dataframe

Unnamed: 0,new_col1,new_col2
s1dk1,1,11
s1dk2,2,11
s1dk3,3,11


In [58]:
np.random.seed(1337)
size = 8

random_matrix = np.random.randint(1, 10, (size, size))

print(f"{random_matrix.size = }")


index = np.random.choice(random_matrix.size, 10, replace=False)
print(f"{index = }")


random_matrix.size = 64
index = array([28, 42, 17, 49, 53, 31, 35,  2, 13, 62])


In [59]:
matrix = np.matrix([[1, 2], [3, 4]])

df = pd.DataFrame(matrix, columns=["A", "B"])
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A,2.0,2.0,1.414214,1.0,1.5,2.0,2.5,3.0
B,2.0,3.0,1.414214,2.0,2.5,3.0,3.5,4.0


# Merge, join, concat, append

In [60]:
df1 = pd.DataFrame(
    {
        'Fruits': ['Banana', 'Apple', 'Grape', 'Orange'],
        'Weight': ['100g', '122g', '50g', '18g'],
    }
)


df2 = pd.DataFrame(
    {
        'Fruits': ['Banana', 'Apple', 'Grape', 'Orange'],
        'Weight': ['100g', '122g', '50g', '18g'],
        #'a': ['100g', '122g', '50g', '18g']
    }
)

display(df1, df2, )

Unnamed: 0,Fruits,Weight
0,Banana,100g
1,Apple,122g
2,Grape,50g
3,Orange,18g


Unnamed: 0,Fruits,Weight
0,Banana,100g
1,Apple,122g
2,Grape,50g
3,Orange,18g


In [61]:
df3 = pd.merge(
    left=df1,
    right=df2,
    how='inner',
    #on=['Fruits'],
    #left_on="Fruitsz",
    #right_on="Fruits"
    indicator=True
    )

df3

Unnamed: 0,Fruits,Weight,_merge
0,Banana,100g,both
1,Apple,122g,both
2,Grape,50g,both
3,Orange,18g,both


In [62]:
df1 = pd.DataFrame(
    {
        'Fruits': ['Banana', 'Apple', 'Grape', 'Orange'],
        'Weight': ['100g', '122g', '50g', '18g'],
    }
)


df2 = pd.DataFrame(
    {
        'Fruitz': ['Banana', 'Apple', 'Grape', 'Orange'],
        'Weightz': ['100g', '122g', '50g', '18g'],
        #'a': ['100g', '122g', '50g', '18g']
    }
)

display(df1, df2)


Unnamed: 0,Fruits,Weight
0,Banana,100g
1,Apple,122g
2,Grape,50g
3,Orange,18g


Unnamed: 0,Fruitz,Weightz
0,Banana,100g
1,Apple,122g
2,Grape,50g
3,Orange,18g


In [63]:
df1.join(df2, how="inner")

Unnamed: 0,Fruits,Weight,Fruitz,Weightz
0,Banana,100g,Banana,100g
1,Apple,122g,Apple,122g
2,Grape,50g,Grape,50g
3,Orange,18g,Orange,18g


In [64]:
df1 = pd.DataFrame(
    {
        'Cities': ['Gothenborg', 'Gothenborg', 'Stockholm', 'Stockholm'],
        'Gender': ['Male', 'Female', 'Male', 'Female'],
    }
)


df2 = pd.DataFrame(
    {
        'Cities': ['Gothenborg', 'Stockholm'],
        'j2': ['100', '122'],
    }
)

display(df1, df2)


Unnamed: 0,Cities,Gender
0,Gothenborg,Male
1,Gothenborg,Female
2,Stockholm,Male
3,Stockholm,Female


Unnamed: 0,Cities,j2
0,Gothenborg,100
1,Stockholm,122


In [65]:
display(pd.concat([df1, df2], axis=1))
f = pd.merge(df1, df2, on='Cities')
f

Unnamed: 0,Cities,Gender,Cities.1,j2
0,Gothenborg,Male,Gothenborg,100.0
1,Gothenborg,Female,Stockholm,122.0
2,Stockholm,Male,,
3,Stockholm,Female,,


Unnamed: 0,Cities,Gender,j2
0,Gothenborg,Male,100
1,Gothenborg,Female,100
2,Stockholm,Male,122
3,Stockholm,Female,122


In [66]:
df1 = pd.DataFrame(
    {
        'Cities': [1, 1, 1, 1,],
        'Gender': ['Male', 'Female', 'Male', 'Female'],
    }
)
df1


Unnamed: 0,Cities,Gender
0,1,Male
1,1,Female
2,1,Male
3,1,Female


In [67]:
df1['f'] = df1['Cities'].astype(str).str.cat(df1['Gender'], sep='v')
df1


Unnamed: 0,Cities,Gender,f
0,1,Male,1vMale
1,1,Female,1vFemale
2,1,Male,1vMale
3,1,Female,1vFemale


In [68]:
display(df1)

f = df1.groupby('Gender')

f.get_group('Male')

Unnamed: 0,Cities,Gender,f
0,1,Male,1vMale
1,1,Female,1vFemale
2,1,Male,1vMale
3,1,Female,1vFemale


Unnamed: 0,Cities,Gender,f
0,1,Male,1vMale
2,1,Male,1vMale


In [69]:
df1
df1.loc[1, 'f']

'1vFemale'

In [70]:
series1 = pd.Series(list("abc"))

series2 = pd.Series(list("aaa"))

series1.str.cat(series2, sep="-")


df = pd.DataFrame(dict(d=series1, f=series2))
df


df.insert(0, 'g', df['d'].str.cat(df['f'], sep='je'))
df

Unnamed: 0,g,d,f
0,ajea,a,a
1,bjea,b,a
2,cjea,c,a


In [71]:
rename = {f'wide_variable_{i}': f'Dos{i}' for i in range(4)}
rename

{'wide_variable_0': 'Dos0',
 'wide_variable_1': 'Dos1',
 'wide_variable_2': 'Dos2',
 'wide_variable_3': 'Dos3'}

In [72]:
# Same can be done with a df as you're doing the same stuff you do with columns but with .index

series = pd.Series({'2021-01-01': 'a', '2022-02-02': 'b', '2023-03-03': 'c'})
series.index

series.index = pd.to_datetime(series.index)
series.index


DatetimeIndex(['2021-01-01', '2022-02-02', '2023-03-03'], dtype='datetime64[ns]', freq=None)

In [73]:
df = pd.DataFrame(series)

# Only works because the index (loc(row, col)) is a datetime object
df.loc['2021']
df.sort_index().loc['2021':'2022'] # In order to make the time series (index) slicable (other method is deprecated)

Unnamed: 0,0
2021-01-01,a
2022-02-02,b


In [74]:
f = [[1, 2],
     [3, 4]]

f = np.array(f)
f = f.flatten()

f[3]

4

In [75]:
f = dict(a = 1, b = 2, c = 3)

g = {key:val for key, val in f.items()}
g

{'a': 1, 'b': 2, 'c': 3}

### Dash

In [1]:
# RUN IN Testing.py

import pandas as pd
import plotly.express as px

from dash import Dash, html, dcc
from dash.dependencies import Output, Input
import dash_bootstrap_components as dbc


app = Dash(__name__)

app.layout = html.Div([
    html.H1('F')
])

if __name__ == '__main__':
    f = 1 / 0
    app.run_server(debug=True)


ZeroDivisionError: division by zero

In [None]:
# RUN IN Testing.py

from dash import Dash, dcc, html
import plotly.express as px
import pandas as pd

app = Dash(__name__)

colors = {
    'background': '#111111',
    'text': '#7FDBFF'
}

df = pd.DataFrame({
    "Fruit": ["Apples", "Oranges", "Bananas", "Apples", "Oranges", "Bananas"],
    "Amount": [4, 1, 2, 2, 4, 5],
    "City": ["SF", "SF", "SF", "Montreal", "Montreal", "Montreal"]
})

fig = px.bar(df, x="Fruit", y="Amount", color="City", barmode="group")

fig.update_layout(
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text']
)

app.layout = html.Div(style={'backgroundColor': colors['background']},
    children=[
        html.H1(
            children='Hello Dash',
            style={
                'textAlign': 'center',
                'color': colors['text']
            }
        ),

        html.Div(children='Dash: A web application framework for your data.', style={
            'textAlign': 'center',
            'color': colors['text']
        }),

        dcc.Graph(
            id='example-graph-2',
            figure=fig
        )
    ]
)

if __name__ == '__main__':
    f = 1 / 0
    app.run_server(debug=True)
