/
cost-splitter.py
203 lines (188 loc) · 7.38 KB
/
cost-splitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/python3
"""
Cost-Splitter
"""
from __future__ import print_function
import boto3
import csv
import operator
import datetime
import time
import os.path
import zipfile
import yaml
import smtplib
from email.mime.text import MIMEText
LINKED_ACCOUNT_HEADING = "LinkedAccountId"
BLENDED_COST_HEADING = "BlendedCost"
def get_last_month():
"""
Get the year and month of the last month
return: Last month string in format YYYY-MM
"""
# Get the first of this month
today = datetime.date.today()
first = today.replace(day=1)
# Minus one day to get the last day of last month
last_month = first - datetime.timedelta(days=1)
# Return YYYY-MM as a string
return last_month.strftime("%Y-%m")
def unzip_file(folder, file_name, archive_path):
"""
Unzips the downloaded file
:param folder: the folder the archive is in
:param file_name: the file name (without .zip)
:param archive_path: the full archive path in form folder/file.csv.zip
return: succeeded: bool
"""
# Check the file is there
if os.path.isfile(archive_path):
# Unzip the file
print('Unzipping {}'.format(archive_path))
zip_ref = zipfile.ZipFile(archive_path, 'r')
zip_ref.extract(file_name, folder)
zip_ref.close()
return True
else:
# Throw an error
print("Error: File {} not downloaded from bucket".format(archive_path))
print("\tare you sure it is there?")
return False
def split_cost(debug, row, search_indices, cost_index, reports, cost_split):
"""
Analyse the row, split it to different reports
:param debug: bool to describe if debug is enabled
:param row: the row to analyse
:param search_indices: a list of ints that represent columns to search
:param cost_index: int representing the blended cost column
:param reports: A list of reports and their tags
:param cost_split: the variable used to store the current cost result
return: cost_split: the variable used to store the current cost result
"""
# We only want the ones that aren't zero
cost = float(row[cost_index])
if cost is not 0.0:
for report in reports:
title = list(report.keys())[0]
tags = next(iter(report.values()))
for tag in tags:
for index in search_indices:
item = row[index].lower()
if tag in item:
cost_split[title] += cost
return cost_split
# If debug mode is on, print the shared items
if debug:
for index in search_indices:
if row[index]:
print("Shared : {} - {} $ {}".format(index, row[index], cost))
cost_split["Shared"] += cost
else:
if debug:
for index in search_indices:
if row[index]:
print("Free: {} - {} $ {}".format(index, row[index], cost))
cost_split["Free"] += cost
return cost_split
def generate_metrics(config):
"""
Generate the metrics
:param config: the loaded config file
"""
start_time = time.time()
# Get the file from s3
file_name = "{}-{}.{}".format(config['file_pattern'], get_last_month(),
"csv")
file_path = "{}/{}".format(config['save_folder'], file_name)
archive_name = "{}.{}".format(file_name, "zip")
archive_path = "{}/{}".format(config['save_folder'], archive_name)
s3_client = boto3.client('s3')
print('Downloading: {}'.format(archive_name))
s3_client.download_file(config['bucket_name'], archive_name, archive_path)
if unzip_file(config['save_folder'], file_name, archive_path):
# load the file, sort it
reader = csv.reader(open(file_path), delimiter=',')
csv_headings = next(reader)
# Get the indices we will need to extract the data
linked_accound_index = csv_headings.index(LINKED_ACCOUNT_HEADING)
index_blended_cost = csv_headings.index(BLENDED_COST_HEADING)
search_indices = []
for heading in config['searchable_column']:
search_indices.append(csv_headings.index(heading))
# sort the csv
sortedlist = sorted(reader,
key=operator.itemgetter(linked_accound_index))
# Loop through the list, because it is ordered by subscription id
# we can give up after the group of subscription id has been loaded.
subscription_list = []
count = 0
count_total = 0
cost_blended = 0.0
cost_split = {}
print('Splitting costs')
for report in config['reports']:
title = list(report.keys())[0]
cost_split[title] = 0.0
cost_split["Shared"] = 0.0
if config["debug"]:
cost_split["Free"] = 0.0
# Loop through and process the list
for item in sortedlist:
count_total += 1
if item[linked_accound_index] == config['linked_account_id']:
subscription_list.append(item)
cost_blended += float(item[index_blended_cost])
cost_split = split_cost(config["debug"], item, search_indices,
index_blended_cost, config["reports"],
cost_split)
count += 1
# Create Report
total = cost_blended / 2
message = ''
message += ('\n=============================\n')
message += ("For the month of {}\r\n".format(get_last_month()))
message += ("Total Cost: {}\r\n".format(round(total, 2)))
for report in cost_split:
cost = cost_split[report] / 2
percent = (cost / total) * 100
message += ("{} cost is {}, {}% of the total\r\n".format(report,
round(cost, 2),
round(percent, 2)))
if config['debug']:
message += ("{} of {} records were relevant\r\n".format(count, count_total))
message += ("completed in %s seconds\r\n" % round(time.time() - start_time))
message += ('\n=============================\n')
# Email or print the results
if config['email']:
# Format Email
email = MIMEText("")
email['From'] = config['email_from']
email['To'] = ', '.join(config['email_to'])
reports = ''
for report in cost_split:
reports += " - " + report
email['Subject'] = "AWS Billing Report" + reports
email['Message'] = message
# Send Email
server = smtplib.SMTP(config['email_smtp'])
server.starttls()
server.login(config['email_from'], config['email_password'])
server.sendmail(config['email_from'], config['email_to'], email.as_string())
server.close()
print('sent mail to ' + ''.join(config['email_to']))
else:
print(message)
def main():
"""
Generate cost metrics
"""
with open("config.yml", 'r') as yaml_file:
config = yaml.safe_load(yaml_file)
if config['bucket_name'] and \
config['file_pattern'] and \
config['linked_account_id']:
generate_metrics(config)
else:
print("Error: config.yml is poorly formed")
if __name__ == "__main__":
main()