Skip to content

Commit 5e01c94

Browse files
committed
v5
1 parent 6c6af13 commit 5e01c94

File tree

8 files changed

+125
-12
lines changed

8 files changed

+125
-12
lines changed

dist/ds11mltoolkit-1.5.tar.gz

-21.3 KB
Binary file not shown.

dist/ds11mltoolkit-1.6.tar.gz

22.6 KB
Binary file not shown.

ds11mltoolkit.egg-info/PKG-INFO

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
Metadata-Version: 2.1
22
Name: ds11mltoolkit
3-
Version: 1.5
3+
Version: 1.6
44
Summary: Helper functions for all stages of the machine learning model building process
55
Home-page: https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit
6-
Download-URL: https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit/archive/refs/tags/v_1_5.tar.gz
6+
Download-URL: https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit/archive/refs/tags/v_1_6.tar.gz
77
Author: TheBridgeMachineLearningPythonLibrary
88
Author-email: seenstevol@protonmail.com
99
License: MIT

ds11mltoolkit.egg-info/requires.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ matplotlib
1515
seaborn
1616
plotly
1717
wordcloud
18+
folium

ds11mltoolkit/data_processing.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def list_categorical_columns(df):
3939

4040

4141

42+
4243
def uniq_value(list_values:list):
4344
'''
4445
Function returning the unique values from a list.
@@ -49,11 +50,13 @@ def uniq_value(list_values:list):
4950
----------
5051
unique: list of unique values
5152
'''
53+
5254
unique = []
5355
for i in list_values:
5456
if i not in unique:
55-
unique.extend(list_values)
56-
return unique
57+
unique.append(i)
58+
return unique
59+
5760

5861
def last_columndf(df,feature):
5962
'''
@@ -253,7 +256,7 @@ def load_imgs(path, im_size:int):
253256
filenames.append(file)
254257
if file [-4:] == '.jpg' or file [-4:] == '.png':
255258
# Read the image in color.
256-
image = imread(subdir + '\\' + file)
259+
image = imread(subdir + '/' + file)
257260
# Resize the image.
258261
smallimage = cv2.resize(image, (im_size, im_size))
259262
# Save the images in the X variable.
@@ -386,7 +389,7 @@ def gen_from_array(
386389
shuffle=True,
387390
sample_weight=None,
388391
seed=None,
389-
save_to_dir='./aug',
392+
save_to_dir=None,
390393
save_prefix='',
391394
save_format='png',
392395
ignore_class_split=False,

ds11mltoolkit/machine_learning.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
from sklearn.pipeline import make_pipeline
3232
from sklearn.preprocessing import scale
3333

34+
from keras.models import Sequential
35+
from keras.layers import LSTM, Dense
36+
3437

3538

3639
def balance_binary_target(df, strategy='smote', minority_ratio=None, visualize=False):
@@ -216,6 +219,7 @@ def load_model_zip(zip_file, model_file):
216219

217220
return model
218221

222+
219223
def image_scrap(url, n:int):
220224
'''
221225
Function to scrap chrome images and get n images we want, and it create a new folder as 'my_images'.
@@ -280,7 +284,7 @@ def download_image(download_path, url, file_name):
280284
image_content = requests.get(url).content
281285
image_file = io.BytesIO(image_content)
282286
image = Image.open(image_file)
283-
file_path = download_path + file_name
287+
file_path = download_path + '/' + file_name
284288

285289
with open(file_path, "wb") as f:
286290
image.save(f, "JPEG")
@@ -729,4 +733,37 @@ def UnsupervisedDR(df, Acumulative_variance=0.85):
729733
reconstruccion,
730734
columns = df.columns,
731735
).set_index(df.index)
732-
return reconstruccion
736+
return reconstruccion
737+
738+
739+
def lstm_model(input_shape, lstm_units, dense_units, output_shape):
740+
"""
741+
Function of a standard LSTM type neural network model.
742+
The output layer has "sigmoid" activation so it is remixed in classification applications.
743+
744+
Parameters
745+
----------
746+
- input_shape: The input shape for the neural network. It is a tuple that specifies the shape of the input data (e.g., (timesteps, features)).
747+
- lstm_units: The number of units in the LSTM layer.
748+
- dense_units: The number of units in the dense layer.
749+
- output_shape: The output shape for the neural network. It is a number that specifies the number of output classes or values (0,1).
750+
Return
751+
------
752+
- model
753+
"""
754+
# Define the sequential model
755+
model = Sequential()
756+
757+
# Add a LSTM layer with the specified number of units and the input form
758+
model.add(LSTM(units=lstm_units, input_shape=input_shape))
759+
760+
# Add a dense layer with the specified number of units.
761+
model.add(Dense(units=dense_units))
762+
763+
# Add an output layer with the specified shape
764+
model.add(Dense(units=output_shape, activation='sigmoid'))
765+
766+
# Compile the model
767+
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
768+
769+
return model

ds11mltoolkit/plot.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from wordcloud import STOPWORDS, WordCloud
1111
import plotly.express as px
1212
from sklearn.metrics import auc, roc_curve
13+
import folium
1314

1415

1516
def plot_multiclass_prediction_image(df, row_index: int, X_test: Union[pd.DataFrame, np.ndarray], prediction_col: str = 'Top Prediction', label_col: str = 'Label'):
@@ -234,4 +235,74 @@ def plot_roc_curve(y_true, y_pred, pos_label=1, figsize=(8, 8)):
234235
plt.ylabel('True Positive Rate')
235236
plt.title('Receiver operating characteristic (ROC) curve')
236237
plt.legend(loc="lower right")
237-
plt.show()
238+
plt.show()
239+
240+
241+
def plot_map(df, lat_col, lon_col, tooltip_col=None, zoom_start=3, map_type='OpenStreetMap'):
242+
"""
243+
Function that creates an interactive map using folium from a dataframe with coordinates.
244+
245+
Parameters
246+
----------
247+
- df: dataframe with coordinates.
248+
- lat_col: name of the column containing latitudes.
249+
- lon_col: name of the column containing the longitudes.
250+
- tooltip_col: (optional) name of the column containing the additional information to show in the tooltip of each marker.
251+
- zoom_start: (optional) initial zoom level of the map.
252+
- map_type: (optional) type of map to use. Possible values: 'OpenStreetMap', 'Stamen Terrain', 'Stamen Toner', 'Stamen Watercolor', 'CartoDB positron', 'CartoDB dark_matter'.
253+
254+
Returns
255+
-------
256+
- map: folium Map object with the added markers
257+
"""
258+
259+
# Create the map with the indicated type and zoom level.
260+
map = folium.Map(location=[df[lat_col][0], df[lon_col][0]], zoom_start=zoom_start, tiles=map_type)
261+
262+
# Add markers for each point of the dataframe
263+
for index, row in df.iterrows():
264+
location = [row[lat_col], row[lon_col]]
265+
tooltip = row[tooltip_col] if tooltip_col else None
266+
folium.Marker(location=location, tooltip=tooltip).add_to(map)
267+
268+
# Return the map
269+
return map
270+
271+
272+
def correl_map_max(dataframe):
273+
"""
274+
Function that, given a dataframe, eliminates the correlations greater than 0.9, visualizes the correlations and returns a new dataframe with columns that meet the condition of being less than 0.9.
275+
and returns a new dataframe with the columns that meet the condition of being less than 0.9.
276+
Parameters
277+
----------
278+
- DataFrame: set of the data to which you want to apply.
279+
Returns
280+
-------
281+
- dataframe
282+
"""
283+
284+
# Calculate the correlation matrix
285+
corr_matrix = dataframe.corr()
286+
287+
# Eliminate variables with correlation higher than 0.9
288+
high_corr_vars = set()
289+
for i in range(len(corr_matrix.columns)):
290+
for j in range(i):
291+
if abs(corr_matrix.iloc[i, j]) > 0.9:
292+
varname_i = corr_matrix.columns[i]
293+
varname_j = corr_matrix.columns[j]
294+
if corr_matrix[varname_i].std() < corr_matrix[varname_j].std():
295+
high_corr_vars.add(varname_i)
296+
else:
297+
high_corr_vars.add(varname_j)
298+
dataframe = dataframe.drop(high_corr_vars, axis=1)
299+
300+
# Generate the visualization of the correlation map
301+
sns.set(style="white")
302+
f, ax = plt.subplots(figsize=(11, 9))
303+
cmap = sns.diverging_palette(220, 10, as_cmap=True)
304+
sns.heatmap(corr_matrix, cmap=cmap, vmax=.3, center=0,
305+
square=True, annot=True, linewidths=.5, cbar_kws={"shrink": .5})
306+
plt.show()
307+
308+
return dataframe

setup.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
setup(
44
name = 'ds11mltoolkit',
55
packages = ['ds11mltoolkit'],
6-
version = '1.5',
6+
version = '1.6',
77
license = 'MIT',
88
description = 'Helper functions for all stages of the machine learning model building process',
99
author = 'TheBridgeMachineLearningPythonLibrary',
1010
author_email = 'seenstevol@protonmail.com',
1111
url = 'https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit',
12-
download_url = 'https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit/archive/refs/tags/v_1_5.tar.gz',
12+
download_url = 'https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit/archive/refs/tags/v_1_6.tar.gz',
1313
keywords = ['machine learning', 'data visualization', 'data processing', 'sklearn', 'pandas'],
1414
install_requires=['pandas',
1515
'scipy',
@@ -27,7 +27,8 @@
2727
'matplotlib',
2828
'seaborn',
2929
'plotly',
30-
'wordcloud'],
30+
'wordcloud',
31+
'folium'],
3132
classifiers=[
3233
'Development Status :: 3 - Alpha',
3334
'Intended Audience :: Developers',

0 commit comments

Comments
 (0)