-
Notifications
You must be signed in to change notification settings - Fork 1
/
machine_learning_manager.py
90 lines (73 loc) · 4.08 KB
/
machine_learning_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# user define imports
from my_package.log_manager import LogManager
from my_package.database_manager import DatabaseManager
from my_package import statistics as statistics
from my_package import visualizer as visualizer
from my_package.machine_learning_components import LinearRegression
from my_package.data_processor import DataProcessor
from my_package.analysis_info import AnalysisInfo, DataInfo, ResultsInfo
from my_package.data_cleaner import DataCleaner
class MachineLearningManager:
def __init__(self):
return
@staticmethod
def get_data(database_manager, data_map_analysis):
df = database_manager.load()
if df is None:
print('Database is empty!')
return None, None
if df.empty:
print('Database is empty!')
return None, None
df_reduced = df[data_map_analysis.keys()]
return df_reduced, df
@staticmethod
def do_analysis(config):
logger = LogManager.instance()
logger.log("Parsing city page text!", logger.Logging_Levels["DEBUG"])
data_map_analysis = {"city": "city", "visitor": "visitor",
"population": "population", "museum": "museum",
"city_visitor": "city_visitor"}
database_manager = DatabaseManager.instance()
loaded_data, original_data = MachineLearningManager.get_data(database_manager, data_map_analysis)
if loaded_data is None:
print('Fail to load the data from database!')
return
cleaned_df = DataProcessor.data_cleanup(loaded_data)
cleaned_original_data = DataProcessor.data_cleanup(original_data)
cleaned_original_data_new = DataCleaner.empty_to_none(cleaned_original_data)
visualizer.missingdata_plot(cleaned_original_data_new, config.silent_mode_enabled)
analysis_list = []
data_map = {"city": "city",
"visitor": "visitor",
"population": "population",
"city_visitor": "city_visitor",
"established":"established",
"type":"type",
"museum": "museum"}
DataProcessor.multiple_linear_data(cleaned_original_data, data_map, config)
data_map = {"city": "city", "visitor": "visitor", "population": "population", "museum": "museum"}
data_info = DataProcessor.population_visitors(cleaned_df, data_map, config)
analysis_list.append(AnalysisInfo(data_info=data_info, type="All"))
data_info = DataProcessor.population_visitors_max(cleaned_df, data_map)
analysis_list.append(AnalysisInfo(data_info=data_info, type="Max"))
data_info = DataProcessor.population_visitors_sum(cleaned_df, data_map)
analysis_list.append(AnalysisInfo(data_info=data_info, type="Sum"))
data_map = {"city": "city", "visitor": "visitor", "city_visitor": "city_visitor", "museum": "museum"}
data_info = DataProcessor.city_visitor_museum_visitors(cleaned_df, data_map, config)
analysis_list.append(AnalysisInfo(data_info=data_info, type="All"))
data_info = DataProcessor.city_visitor_museum_visitors_max(cleaned_df, data_map)
analysis_list.append(AnalysisInfo(data_info=data_info, type="Max"))
data_info = DataProcessor.city_visitor_museum_visitors_sum(cleaned_df, data_map)
analysis_list.append(AnalysisInfo(data_info=data_info, type="Sum"))
silent_mode_enabled = config.silent_mode_enabled
for analysis in analysis_list:
LinearRegression.perform_analysis(analysis)
visualizer.plot_results(analysis, silent_mode_enabled)
visualizer.scatter_plot_results(analysis, silent_mode_enabled)
visualizer.residual_plot(analysis, silent_mode_enabled)
if logger.debug_enabled():
visualizer.print_result(analysis)
visualizer.scatter_plots(analysis_list, "Linear Regression analysis results", silent_mode_enabled)
visualizer.print_regression_results(analysis_list, True)
visualizer.print_smart_table(analysis_list, " Linear Regression analysis results")