In [1]:
import numpy as np
import pandas as pd
from scipy.stats import kurtosis, skew, zscore

### What does df.melt do?

In [2]:
df = pd.DataFrame(
    {
        "Name": {0: "John", 1: "Bob", 2: "Shiela"},
        "Course": {0: "Masters", 1: "Graduate", 2: "Graduate"},
        "Age": {0: 27, 1: 23, 2: 21},
    }
)
example = pd.melt(df, id_vars=["Name", "Age"], value_vars=["Course"])
example

Unnamed: 0,Name,Age,variable,value
0,John,27,Course,Masters
1,Bob,23,Course,Graduate
2,Shiela,21,Course,Graduate


In [3]:
df = pd.DataFrame(
    {
        "Name": ["John", "Bob", "Shiela", "John", "Bob", "Shiela"],
        "Course": [
            "Masters",
            "Graduate",
            "Graduate",
            "Masters",
            "Graduate",
            "Graduate",
        ],
        "Age": [21, 27, 31, 21, 27, 31],
        "OlderAge": [41, 47, 49, 41, 49, 51],
    }
)
df

Unnamed: 0,Name,Course,Age,OlderAge
0,John,Masters,21,41
1,Bob,Graduate,27,47
2,Shiela,Graduate,31,49
3,John,Masters,21,41
4,Bob,Graduate,27,49
5,Shiela,Graduate,31,51


### What is the purpose of melt?
- The idea is that if you have two columns that are the same, you can add them together.

In [4]:
df_melt = df.melt(
    id_vars=["Course"],
    value_vars=["Age", "OlderAge"],
    value_name="NumericalAgeValue",
    var_name="AllAges",
)
df_melt

Unnamed: 0,Course,AllAges,NumericalAgeValue
0,Masters,Age,21
1,Graduate,Age,27
2,Graduate,Age,31
3,Masters,Age,21
4,Graduate,Age,27
5,Graduate,Age,31
6,Masters,OlderAge,41
7,Graduate,OlderAge,47
8,Graduate,OlderAge,49
9,Masters,OlderAge,41


In [36]:
data = [
    [5, 5, 5, 5, 5],  # Row 0
    [5, 9, 3, 8, 3],  # No Big Change
    [5, 9, 3, 9, 3],  # No Big Change
    [5, 4, 4, 7, 4],  # Change
]
z_scores = np.abs(zscore(data, axis=0))
z_scores

  z_scores = np.abs(zscore(data, axis=0))


array([[       nan, 0.76834982, 1.50755672, 1.52127766, 1.50755672],
       [       nan, 0.98787834, 0.90453403, 0.50709255, 0.90453403],
       [       nan, 0.98787834, 0.90453403, 1.18321596, 0.90453403],
       [       nan, 1.20740686, 0.30151134, 0.16903085, 0.30151134]])

In [6]:
threshold = 9
z_scores = [
    [0.1, 0.5, 15.0, 0.3],  # Row 0 (Value 15.0 is > 9)
    [0.2, 0.4, 0.5, 0.1],  # Row 1
    [0.2, 0.4, 0.5, 0.1],  # Row 2
]
outlier_rows = np.where(np.max(z_scores, axis=1) > threshold)[0]
outlier_rows

array([0])

In [67]:
data = [
    [2, 2, 3, 3, 3, 4, 7, 3, 3, 2],
    [5, 9, 9, 9, 4, 5, 9, 9, 9, 4],
    [2, 1, 4, 9, 4, 5, 4, 4, 9, 4],
]
df = pd.DataFrame(data)
mean_signals = kurtosis(df, axis=1)
mean_signals

array([ 2.60058309, -1.70109261, -0.35770361])

In [8]:
data = np.array(data)
e = data**2
f = np.sum(e, axis=1)
data

array([[5, 2, 2, 7, 2],
       [5, 3, 3, 7, 3],
       [5, 4, 4, 7, 4]])

In [9]:
e

array([[25,  4,  4, 49,  4],
       [25,  9,  9, 49,  9],
       [25, 16, 16, 49, 16]])

In [10]:
f

array([ 86, 101, 122])

In [11]:
data = [[1, 5, 3, 4, 5, 7], [9, 7, 3, 2, 1, 1], [2, 3, 4, 4, 3, 3]]
d = np.array(data)
lists = np.diff(d, axis=1)
lists

array([[ 4, -2,  1,  1,  2],
       [-2, -4, -1, -1,  0],
       [ 1,  1,  0, -1,  0]])

In [12]:
dx = np.mean(lists, axis=1)
dx

array([ 1.2, -1.6,  0.2])

In [13]:
signal = np.array([[3, 10, 9, 8, 4, 3, 9], [7, 10, 9, 8, 6, 7, 9]])
minus = signal.mean(axis=1, keepdims=True)
minus

array([[6.57142857],
       [8.        ]])

In [14]:
signal_centered = signal - minus
signal_centered

array([[-3.57142857,  3.42857143,  2.42857143,  1.42857143, -2.57142857,
        -3.57142857,  2.42857143],
       [-1.        ,  2.        ,  1.        ,  0.        , -2.        ,
        -1.        ,  1.        ]])

In [15]:
sign = np.sign(signal_centered)
sign

array([[-1.,  1.,  1.,  1., -1., -1.,  1.],
       [-1.,  1.,  1.,  0., -1., -1.,  1.]])

In [42]:
signals = np.array([[10, 12, 10, 8, 10], [100, 105, 100, 100, 105]])
minus = signals.var(axis=1)
minus

array([1.6, 6. ])

In [17]:
centered_signals = signals - signals.mean(axis=1, keepdims=True)
centered_signals

array([[ 0.,  2.,  0., -2.,  0.],
       [-1.,  4., -6., -1.,  4.]])

In [18]:
signed = np.sign(centered_signals)
signed

array([[ 0.,  1.,  0., -1.,  0.],
       [-1.,  1., -1., -1.,  1.]])

In [19]:
cross = signed >= 0
cross

array([[ True,  True,  True, False,  True],
       [False,  True, False, False,  True]])

In [20]:
difference_adjacent = np.diff(cross, axis=1)
difference_adjacent

array([[False, False,  True,  True],
       [ True,  True, False,  True]])

In [21]:
sums = np.sum(difference_adjacent, axis=1)
sums

array([2, 3])