v5

seenstevo · seenstevo · commit 5e01c94f8cb2 · 2023-02-23T21:59:32.000+01:00
diff --git a/dist/ds11mltoolkit-1.5.tar.gz b/dist/ds11mltoolkit-1.5.tar.gz
diff --git a/dist/ds11mltoolkit-1.6.tar.gz b/dist/ds11mltoolkit-1.6.tar.gz
diff --git a/ds11mltoolkit.egg-info/PKG-INFO b/ds11mltoolkit.egg-info/PKG-INFO
@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: ds11mltoolkit
-Version: 1.5
+Version: 1.6
 Summary: Helper functions for all stages of the machine learning model building process
 Home-page: https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit
-Download-URL: https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit/archive/refs/tags/v_1_5.tar.gz
+Download-URL: https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit/archive/refs/tags/v_1_6.tar.gz
 Author: TheBridgeMachineLearningPythonLibrary
 Author-email: seenstevol@protonmail.com
 License: MIT
diff --git a/ds11mltoolkit.egg-info/requires.txt b/ds11mltoolkit.egg-info/requires.txt
@@ -15,3 +15,4 @@ matplotlib
 seaborn
 plotly
 wordcloud
+folium
diff --git a/ds11mltoolkit/data_processing.py b/ds11mltoolkit/data_processing.py
@@ -39,6 +39,7 @@ def list_categorical_columns(df):
 
 
 
+
 def uniq_value(list_values:list):
     '''
     Function returning the unique values from a list.
@@ -49,11 +50,13 @@ def uniq_value(list_values:list):
     ----------
     unique: list of unique values
     '''
+
     unique = []
     for i in list_values:
         if i not in unique:
-            unique.extend(list_values)
-    return unique
+            unique.append(i)
+        return unique
+
 
 def last_columndf(df,feature):
     '''
@@ -253,7 +256,7 @@ def load_imgs(path, im_size:int):
                 filenames.append(file)
                 if file [-4:] == '.jpg' or file [-4:] == '.png':
                     # Read the image in color.
-                    image = imread(subdir + '\\' + file)
+                    image = imread(subdir + '/' + file)
                     # Resize the image.
                     smallimage = cv2.resize(image, (im_size, im_size)) 
                     # Save the images in the X variable.
@@ -386,7 +389,7 @@ def gen_from_array(
         shuffle=True,
         sample_weight=None,
         seed=None,
-        save_to_dir='./aug',
+        save_to_dir=None,
         save_prefix='',
         save_format='png',
         ignore_class_split=False,
diff --git a/ds11mltoolkit/machine_learning.py b/ds11mltoolkit/machine_learning.py
@@ -31,6 +31,9 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import scale
 
+from keras.models import Sequential
+from keras.layers import LSTM, Dense
+
 
 
 def balance_binary_target(df, strategy='smote', minority_ratio=None, visualize=False):
@@ -216,6 +219,7 @@ def load_model_zip(zip_file, model_file):
 
     return model
 
+
 def image_scrap(url, n:int):
 	'''
 	Function to scrap chrome images and get n images we want, and it create a new folder as 'my_images'.
@@ -280,7 +284,7 @@ def download_image(download_path, url, file_name):
 			image_content = requests.get(url).content
 			image_file = io.BytesIO(image_content)
 			image = Image.open(image_file)
-			file_path = download_path + file_name
+			file_path = download_path + '/' + file_name
 
 			with open(file_path, "wb") as f:
 				image.save(f, "JPEG")
@@ -729,4 +733,37 @@ def UnsupervisedDR(df, Acumulative_variance=0.85):
         reconstruccion,
         columns = df.columns,
     ).set_index(df.index)
-    return reconstruccion
+    return reconstruccion
+
+
+def lstm_model(input_shape, lstm_units, dense_units, output_shape):
+    """
+    Function of a standard LSTM type neural network model. 
+    The output layer has "sigmoid" activation so it is remixed in classification applications.
+    
+    Parameters
+    ----------
+        - input_shape: The input shape for the neural network. It is a tuple that specifies the shape of the input data (e.g., (timesteps, features)).
+        - lstm_units: The number of units in the LSTM layer.
+        - dense_units: The number of units in the dense layer.
+        - output_shape: The output shape for the neural network. It is a number that specifies the number of output classes or values (0,1).
+    Return
+    ------
+        - model
+    """
+    # Define the sequential model
+    model = Sequential()
+
+    # Add a LSTM layer with the specified number of units and the input form
+    model.add(LSTM(units=lstm_units, input_shape=input_shape))
+
+    # Add a dense layer with the specified number of units.
+    model.add(Dense(units=dense_units))
+
+    # Add an output layer with the specified shape
+    model.add(Dense(units=output_shape, activation='sigmoid'))
+
+    # Compile the model
+    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+
+    return model
diff --git a/ds11mltoolkit/plot.py b/ds11mltoolkit/plot.py
@@ -10,6 +10,7 @@
 from wordcloud import STOPWORDS, WordCloud
 import plotly.express as px
 from sklearn.metrics import auc, roc_curve
+import folium
 
 
 def plot_multiclass_prediction_image(df, row_index: int, X_test: Union[pd.DataFrame, np.ndarray], prediction_col: str = 'Top Prediction', label_col: str = 'Label'):
@@ -234,4 +235,74 @@ def plot_roc_curve(y_true, y_pred, pos_label=1, figsize=(8, 8)):
     plt.ylabel('True Positive Rate')
     plt.title('Receiver operating characteristic (ROC) curve')
     plt.legend(loc="lower right")
-    plt.show()
+    plt.show()
+
+
+def plot_map(df, lat_col, lon_col, tooltip_col=None, zoom_start=3, map_type='OpenStreetMap'):
+    """
+    Function that creates an interactive map using folium from a dataframe with coordinates.
+    
+    Parameters
+    ----------
+        - df: dataframe with coordinates.
+        - lat_col: name of the column containing latitudes.
+        - lon_col: name of the column containing the longitudes.
+        - tooltip_col: (optional) name of the column containing the additional information to show in the tooltip of each marker.
+        - zoom_start: (optional) initial zoom level of the map.
+        - map_type: (optional) type of map to use. Possible values: 'OpenStreetMap', 'Stamen Terrain', 'Stamen Toner', 'Stamen Watercolor', 'CartoDB positron', 'CartoDB dark_matter'.
+    
+    Returns
+    -------
+        - map: folium Map object with the added markers
+    """
+    
+    # Create the map with the indicated type and zoom level.
+    map = folium.Map(location=[df[lat_col][0], df[lon_col][0]], zoom_start=zoom_start, tiles=map_type)
+
+    # Add markers for each point of the dataframe
+    for index, row in df.iterrows():
+        location = [row[lat_col], row[lon_col]]
+        tooltip = row[tooltip_col] if tooltip_col else None
+        folium.Marker(location=location, tooltip=tooltip).add_to(map)
+
+    # Return the map
+    return map
+
+
+def correl_map_max(dataframe):
+    """
+    Function that, given a dataframe, eliminates the correlations greater than 0.9, visualizes the correlations and returns a new dataframe with columns that meet the condition of being less than 0.9. 
+    and returns a new dataframe with the columns that meet the condition of being less than 0.9. 
+    Parameters
+    ----------
+        - DataFrame: set of the data to which you want to apply.
+    Returns
+    -------
+        - dataframe
+    """
+
+    # Calculate the correlation matrix
+    corr_matrix = dataframe.corr()
+
+    # Eliminate variables with correlation higher than 0.9
+    high_corr_vars = set()
+    for i in range(len(corr_matrix.columns)):
+        for j in range(i):
+            if abs(corr_matrix.iloc[i, j]) > 0.9:
+                varname_i = corr_matrix.columns[i]
+                varname_j = corr_matrix.columns[j]
+                if corr_matrix[varname_i].std() < corr_matrix[varname_j].std():
+                    high_corr_vars.add(varname_i)
+                else:
+                    high_corr_vars.add(varname_j)
+    dataframe = dataframe.drop(high_corr_vars, axis=1)
+
+    # Generate the visualization of the correlation map
+    sns.set(style="white")
+    f, ax = plt.subplots(figsize=(11, 9))
+    cmap = sns.diverging_palette(220, 10, as_cmap=True)
+    sns.heatmap(corr_matrix, cmap=cmap, vmax=.3, center=0,
+                square=True, annot=True, linewidths=.5, cbar_kws={"shrink": .5})
+    plt.show()
+
+    return dataframe
diff --git a/setup.py b/setup.py
@@ -3,13 +3,13 @@
 setup(
   name = 'ds11mltoolkit',
   packages = ['ds11mltoolkit'],
-  version = '1.5',
+  version = '1.6',
   license = 'MIT',
   description = 'Helper functions for all stages of the machine learning model building process',
   author = 'TheBridgeMachineLearningPythonLibrary',
   author_email = 'seenstevol@protonmail.com',
   url = 'https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit',
-  download_url = 'https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit/archive/refs/tags/v_1_5.tar.gz',
+  download_url = 'https://github.com/TheBridgeMachineLearningPythonLibrary/MachineLearningToolKit/archive/refs/tags/v_1_6.tar.gz',
   keywords = ['machine learning', 'data visualization', 'data processing', 'sklearn', 'pandas'],
   install_requires=['pandas',
                     'scipy',
@@ -27,7 +27,8 @@
                     'matplotlib',
                     'seaborn',
                     'plotly',
-                    'wordcloud'],
+                    'wordcloud',
+                    'folium'],
   classifiers=[
     'Development Status :: 3 - Alpha',
     'Intended Audience :: Developers',

-Original file line number
+Diff line change
 seaborn
 plotly
 wordcloud
 +folium