In [1]:
# IMPORTS AND STANDING DATA
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mtick

#status standing data
current_status = pd.read_csv(r'/Users/leogallagher/Documents/FMA-Work/KRG/PFM/Code/Expenditure/srm-datapack/standing-data/standing-data-current-status.csv')
# Which AUs belong to which ministry standing data
au_ministry_data = pd.read_csv(r'/Users/leogallagher/Documents/FMA-Work/KRG/PFM/Code/Expenditure/srm-datapack/standing-data/standing-data-au-ministry.csv')
#current_status.iloc[64,0]
# Analysis date
analysis_date = datetime(2023, 3, 1)

In [2]:
# Import Raw Data
df = pd.read_csv(r'/Users/leogallagher/Documents/FMA-Work/KRG/PFM/Code/Expenditure/raw-data/srm-raw-data/230301-srm-raw-data.csv', low_memory=False)

In [3]:
# Define function to remove extra characters from current state string in the raw data
def remove_extra_chars(string):
    return string.split(' $')[0]

# Apply function to 'Current State' column
df['Current State'] = df['Current State'].apply(remove_extra_chars)

In [4]:
# Columns to keep immediately, filtering out the many columns that are unneeded in the raw data
df_columns_to_keep = [
        'Srid','Date Submitted To Mofe','Approval Number Issued At','Ministry Input Id Issued At',
        'Ministry Id Issued At','Ministry Diwan Reviewer Action Date','Ministry Diwan Decision Maker Action Date',
        'Entity','Account Code','Exchange Rate','Total Cost Of Line','Amended Total Cost Of Line',
        'Savings Of Line','Total Sr Savings','Spending Team Action','Spending Team Action Date',
        'Price Evaluation Action','Price Evaluation Action Date','Engineering Directorate Action',
        'Engineering Directorate Action Date','Publications Directorate Action','Publications Directorate Action Date',
        'Director Of Spending Action','Director Of Spending Action Date','Dg Pa Action','Dg Pa Action Date',
        'Minister Of Finance Action','Minister Of Finance Action Date','Com Action','Com Action Date',
        'Second Round Ministry Diwan Reviewer Action Date','Second Round Ministry Diwan Decision Maker Action Date',
        'Second Round Spending Team Action Date','Second Round Director Of Spending Action Date',
        'Second Round Dg Pa Action Date','Second Round Minister Of Finance Action Date','Second Round Com Action Date',
        'Second Round Final Approval Date','Final Approval Date','Current State']
df_filtered = df.loc[:, df_columns_to_keep]
#df_filtered.head(5)

In [5]:
# Create a Most Recent Action column
#columns to include are all the columns from which the 'most recent action' could be. I.e. the most recent action
# is the date that is most recent of the actions listed below.
columns_to_include = [
        'Date Submitted To Mofe','Approval Number Issued At','Ministry Input Id Issued At',
        'Ministry Id Issued At','Ministry Diwan Reviewer Action Date','Ministry Diwan Decision Maker Action Date',
        'Spending Team Action Date',
        'Price Evaluation Action Date',
        'Engineering Directorate Action Date','Publications Directorate Action Date',
        'Director Of Spending Action Date','Dg Pa Action Date',
        'Minister Of Finance Action Date','Com Action Date',
        'Second Round Ministry Diwan Reviewer Action Date','Second Round Ministry Diwan Decision Maker Action Date',
        'Second Round Spending Team Action Date','Second Round Director Of Spending Action Date',
        'Second Round Dg Pa Action Date','Second Round Minister Of Finance Action Date',
        'Second Round Com Action Date','Second Round Final Approval Date','Final Approval Date']

df_filtered['Most Recent Action'] = df_filtered[columns_to_include].max(axis=1)

# convert date_column to datetime format
df_filtered['Most Recent Action'] = pd.to_datetime(df_filtered['Most Recent Action'], format='%Y-%m-%d %I:%M %p', errors='coerce').fillna(pd.to_datetime(df_filtered['Most Recent Action'], format='%Y-%m-%d', errors='coerce'))

#df_filtered['Most Recent Action'].isna().sum()

In [6]:
# Display updated dataframe
#df['Current State'].unique()

# Merge standing data and raw data to get the status columns
current_status['Current State'] = current_status['Current State'].str.upper()
df_filtered['Current State'] = df_filtered['Current State'].str.upper()
df_filtered = pd.merge(df_filtered, current_status,how='left', on='Current State')

In [7]:
# ADD STATUS COLUMN
def final_status(row):
    if row['Current State'] == "REVOKED" and row['Date Submitted To Mofe'] == "-":
        
        return "Not Submitted"
    
    elif row['Current State'] == "REVOKED" and row['Final Approval Date'] != "-":
        if row['Minister Of Finance Action'] == "Approved By Minister Of Finance" or row['Minister Of Finance Action'] == "Amended and Approved By Minister Of Finance" or  row['Minister Of Finance Action'] == "Pending Tender":
                
            return "Revoked After Approval"
        
        elif row['Com Action'] == "Approved By Council Of Ministers" or row['Com Action'] == "Amended And Approved By Council Of Ministers" or row['Com Action'] == "Pending Tender By Council Of Ministers":
                
            return "Revoked After Approval"
        
        elif row['Minister Of Finance Action'] == "Rejected By Minister Of Finance" or row['Com Action'] == "Rejected By Council Of Ministers":
                
            return "Revoked After Rejection"
        
    elif row['Status'] == 'In process':
        return 'In Process'
    
    elif row['Status'] == 'Not submitted':
        return 'Not Submitted'
    
    elif row['Status'] == 'Revoked':
        return 'Revoked'
    
    elif row['Status'] == 'Rejected':
        return 'Rejected'
    
    elif row['Status'] == 'Approved':
        if row['Total Sr Savings'] == 0:
            return 'Approved'
        else:
            return 'Amended'
    else:
        return np.nan
    
df_filtered['Final Status'] = df_filtered.apply(final_status, axis=1)

In [12]:
# Replace '-' values with zeros in the 'Savings Of Line' column and blanks in the exchange rate column
# same for amended total cost of line, and total cost of line. and exchange rate remove na's

df_filtered['Savings Of Line'] = df_filtered['Savings Of Line'].replace('-', 0).fillna(0)
df_filtered['Amended Total Cost Of Line'] = df_filtered['Amended Total Cost Of Line'].replace('-', 0).fillna(0)
df_filtered['Total Cost Of Line'] = df_filtered['Total Cost Of Line'].replace('-', 0).fillna(0)
df_filtered['Exchange Rate'] = df_filtered['Exchange Rate'].fillna(0)

# Convert the 'Savings Of Line' column to float type
df_filtered['Savings Of Line'] = df_filtered['Savings Of Line'].astype(float)
# Convert the 'Amended Total Cost Of Line' column to float type
df_filtered['Amended Total Cost Of Line'] = df_filtered['Amended Total Cost Of Line'].astype(float)
# Convert the 'Savings Of Line' column to float type
df_filtered['Total Cost Of Line'] = df_filtered['Total Cost Of Line'].astype(float)

In [13]:
### ADD 'LINE ITEM SAVINGS IQD'
df_filtered['Line Item Savings IQD'] = df_filtered.apply(lambda row:
                                                         row['Exchange Rate'] * row['Savings Of Line'], axis=1)

In [14]:
### ADD 'Amended Line Item IQD'
df_filtered['Amended Line Item IQD'] = df_filtered.apply(lambda row:
                                                         row['Exchange Rate'] * row['Amended Total Cost Of Line'], axis=1)

In [15]:
### Add 'Original Line Item IQD'
df_filtered['Original Line Item IQD'] = df_filtered.apply(lambda row:
                                                         row['Exchange Rate'] * row['Total Cost Of Line'], axis=1)


In [18]:
### ADD 'Amended Line Item IQD'

# Group the rows of the DataFrame by the values in 'Srid',
# and calculate the sum of 'Original Line Item IQD' for each group
sum_original = df_filtered.groupby('Srid')['Original Line Item IQD'].sum()

# Create a new column 'Original Total Cost of SR' with the calculated values
df_filtered['Original Total Cost of SR'] = df_filtered['Srid'].map(sum_original)

In [19]:
df_filtered.head()

Unnamed: 0,Srid,Date Submitted To Mofe,Approval Number Issued At,Ministry Input Id Issued At,Ministry Id Issued At,Ministry Diwan Reviewer Action Date,Ministry Diwan Decision Maker Action Date,Entity,Account Code,Exchange Rate,...,Current State,Most Recent Action,User Currently With,Status,Status (Updated),Final Status,Line Item Savings IQD,Amended Line Item IQD,Original Line Item IQD,Original Total Cost of SR
0,SRID-1,2020-07-20 10:29 pm,2020-10-15,2020-07-20,2020-07-20,-,2020-07-20 10:29 pm,وەزارەتی گواستنەوە و گەیاندن,2-01-02-09-02-00,1.0,...,OUTPUT ID ISSUED,2020-10-15,Ministry - Rejected,Rejected,Rejected - Inactive,Rejected,0.0,341000000.0,341000000.0,806000000.0
1,SRID-1,2020-07-20 10:29 pm,2020-10-15,2020-07-20,2020-07-20,-,2020-07-20 10:29 pm,وەزارەتی گواستنەوە و گەیاندن,2-01-04-07-00-00,1.0,...,OUTPUT ID ISSUED,2020-10-15,Ministry - Rejected,Rejected,Rejected - Inactive,Rejected,0.0,62000000.0,62000000.0,806000000.0
2,SRID-1,2020-07-20 10:29 pm,2020-10-15,2020-07-20,2020-07-20,-,2020-07-20 10:29 pm,وەزارەتی گواستنەوە و گەیاندن,2-01-03-08-02-00,1.0,...,OUTPUT ID ISSUED,2020-10-15,Ministry - Rejected,Rejected,Rejected - Inactive,Rejected,0.0,155000000.0,155000000.0,806000000.0
3,SRID-1,2020-07-20 10:29 pm,2020-10-15,2020-07-20,2020-07-20,-,2020-07-20 10:29 pm,وەزارەتی گواستنەوە و گەیاندن,2-01-03-08-02-00,1.0,...,OUTPUT ID ISSUED,2020-10-15,Ministry - Rejected,Rejected,Rejected - Inactive,Rejected,0.0,31000000.0,31000000.0,806000000.0
4,SRID-1,2020-07-20 10:29 pm,2020-10-15,2020-07-20,2020-07-20,-,2020-07-20 10:29 pm,وەزارەتی گواستنەوە و گەیاندن,2-01-03-08-02-00,1.0,...,OUTPUT ID ISSUED,2020-10-15,Ministry - Rejected,Rejected,Rejected - Inactive,Rejected,0.0,217000000.0,217000000.0,806000000.0
