# Head of code:
1. What type of encoding utilized?
2. Impost necessary libraries

In [1]:
import pandas as pd
from pathlib import Path

# Next block:
**-** Make the connection between the file sources.

In [2]:
miss2_csv = Path(r"C:\Users\admin\Desktop\FinTech\FinTech2022_Module3_Financial Analysis_Pandas\01_Missing_Money\01_Missing_Money\Resources\money_flows.csv")

# Next block:
__-__ Make Pandas read the file.\
**-** Make Pandas create the dataframe.

In [3]:
miss2_df = pd.read_csv(miss2_csv)
miss2_df

Unnamed: 0,Date,Total Payments
0,1/1/20,
1,1/2/20,1.04
2,1/3/20,1.65
3,1/3/20,1.65
4,1/3/20,1.65
...,...,...
363,12/26/20,210.13
364,12/27/20,211.08
365,12/28/20,213.27
366,12/29/20,217.28


# C.P.A.
### Fix dataframe asthetics
**-** Fix the column index so that information in the date column can be utilize

In [4]:
miss2_df = pd.read_csv(miss2_csv, index_col="Date", parse_dates=True, infer_datetime_format=True)
miss2_df

Unnamed: 0_level_0,Total Payments
Date,Unnamed: 1_level_1
2020-01-01,
2020-01-02,1.04
2020-01-03,1.65
2020-01-03,1.65
2020-01-03,1.65
...,...
2020-12-26,210.13
2020-12-27,211.08
2020-12-28,213.27
2020-12-29,217.28


# C.P.A.
**-** There is a missing data/value.

__-__ Requirement:
>- Statistics about the missing data(s)\
>- Manipulate missing data/values\
- Determination: Materiality - Significant or Non-significant

In [5]:
miss2_df.isnull()


Unnamed: 0_level_0,Total Payments
Date,Unnamed: 1_level_1
2020-01-01,True
2020-01-02,False
2020-01-03,False
2020-01-03,False
2020-01-03,False
...,...
2020-12-26,False
2020-12-27,False
2020-12-28,False
2020-12-29,False


In [6]:
miss2_df.isnull().sum()


Total Payments    10
dtype: int64

In [7]:
miss2_df.isnull().mean()

Total Payments    0.027174
dtype: float64

# Conclusion: Missing Values
### Proceed by:
- Missing values making up 2.7% of its entirety in Total Payments Column: materially non-significant\
- Proceed by: dropping or replacing ("Unknown", 0, mean)

In [13]:
# Call the dataframe.
miss2_df

Unnamed: 0_level_0,Total Payments
Date,Unnamed: 1_level_1
2020-01-01,
2020-01-02,1.04
2020-01-03,1.65
2020-01-03,1.65
2020-01-03,1.65
...,...
2020-12-26,210.13
2020-12-27,211.08
2020-12-28,213.27
2020-12-29,217.28


In [8]:
# Create a new "drop missing" version of the dataframe.
miss2_drp_df = miss2_df.dropna()


# C.P.A.
### Dropping
##### Validation
- Confirming the drop items by asking are there __NaN__ present in the dataframe?

In [9]:
miss2_drp_df.isnull().sum()


Total Payments    0
dtype: int64

In [10]:
miss2_drp_df.isnull().mean()


Total Payments    0.0
dtype: float64

### Validated: 
#### There are no known **NaNs** present at this point.

# C.P.A.
### Replacing: "Unknown", 0, mean
##### Validation
- Confirming replacements are present by asking are there __"Unknown", 0, mean__ present in the dataframe?

In [12]:
# Call the dataframe.
miss2_df

Unnamed: 0_level_0,Total Payments
Date,Unnamed: 1_level_1
2020-01-01,
2020-01-02,1.04
2020-01-03,1.65
2020-01-03,1.65
2020-01-03,1.65
...,...
2020-12-26,210.13
2020-12-27,211.08
2020-12-28,213.27
2020-12-29,217.28


# C.P.A.
### Replacing: "Unknown"
##### Validation
- Confirming replacements are present by asking are there "Unknown" replacements present in the dataframe?

In [16]:
# Create new "replace missing" version of the dataframe. Replacements choices are as follows: "Unknown", 0, mean.
miss2_rlp_df1 = miss2_df.fillna("Unknown")

In [17]:
# Call "Unknown" replacement version of the dataframe.
miss2_rlp_df1

Unnamed: 0_level_0,Total Payments
Date,Unnamed: 1_level_1
2020-01-01,Unknown
2020-01-02,1.04
2020-01-03,1.65
2020-01-03,1.65
2020-01-03,1.65
...,...
2020-12-26,210.13
2020-12-27,211.08
2020-12-28,213.27
2020-12-29,217.28


# C.P.A.
## Validation
##### Replacing: "Unknown"

- Confirming replacements are present by asking are there __"Unknown"__ replacements in the dataframe?

# C.P.A.
### Replacing: Zero = 0
##### Validation
- Confirming replacements are present by asking are there 0 replacements present in the dataframe?

In [22]:
# Create new "replace missing" version of the dataframe. Replacements choices are as follows: "Unknown", 0, mean.
miss2_rlp_df2 = miss2_df.fillna(0)

In [23]:
# Call "Unknown" replacement version of the dataframe.
miss2_rlp_df2

Unnamed: 0_level_0,Total Payments
Date,Unnamed: 1_level_1
2020-01-01,0.00
2020-01-02,1.04
2020-01-03,1.65
2020-01-03,1.65
2020-01-03,1.65
...,...
2020-12-26,210.13
2020-12-27,211.08
2020-12-28,213.27
2020-12-29,217.28


# C.P.A.
## Validation
##### Replacing: 0

- Confirming replacements are present by asking are there 0 replacements in the dataframe? Do a checksum procedure.

In [None]:
# Checksum procedure

# C.P.A.
### Replacing: mean or average amount
##### Validation
- Confirming replacements are present by asking are there mean amounts present in the dataframe?

In [26]:
# Create new "replace missing" version of the dataframe. Replacements choices are as follows: "Unknown", 0, mean.
miss2_rlp_df3 = miss2_df.fillna(miss2_df.mean())

In [27]:
# Call "Unknown" replacement version of the dataframe.
miss2_rlp_df3

Unnamed: 0_level_0,Total Payments
Date,Unnamed: 1_level_1
2020-01-01,116.385866
2020-01-02,1.040000
2020-01-03,1.650000
2020-01-03,1.650000
2020-01-03,1.650000
...,...
2020-12-26,210.130000
2020-12-27,211.080000
2020-12-28,213.270000
2020-12-29,217.280000


# C.P.A.
## Validation
##### Replacing: mean or average amount

- Confirming replacements are present by asking are there mean amount replacements in the dataframe?

In [None]:
# Checcksumn procedure?