In [4]:
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    'Color': ['Red', 'Blue', 'Green'],
    'Size': ['S', 'M', 'L'],
    'Price': [10, 20, 15]
})

categorical_columns = ['Color', 'Size']

# One-hot encoding using sklearn
encoder = OneHotEncoder(sparse_output=False)
one_hot_encoded = encoder.fit_transform(df[categorical_columns])

one_hot_df = pd.DataFrame(one_hot_encoded, 
                          columns=encoder.get_feature_names_out(categorical_columns))

df_sklearn_encoded = pd.concat([df.drop(categorical_columns, axis=1), one_hot_df], axis=1)

print(df_sklearn_encoded)


   Price  Color_Blue  Color_Green  Color_Red  Size_L  Size_M  Size_S
0     10         0.0          0.0        1.0     0.0     0.0     1.0
1     20         1.0          0.0        0.0     0.0     1.0     0.0
2     15         0.0          1.0        0.0     1.0     0.0     0.0


In [5]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

data = {'Employee id': [10, 20, 15, 25, 30],
        'Gender': ['M', 'F', 'F', 'M', 'F'],
        'Remarks': ['Good', 'Nice', 'Good', 'Great', 'Nice'],
        }
df = pd.DataFrame(data)
print(f"Employee data : \n{df}")

categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
encoder = OneHotEncoder(sparse_output=False)

one_hot_encoded = encoder.fit_transform(df[categorical_columns])

one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))

df_encoded = pd.concat([df, one_hot_df], axis=1)

df_encoded = df_encoded.drop(categorical_columns, axis=1)
print(f"Encoded Employee data : \n{df_encoded}")

Employee data : 
   Employee id Gender Remarks
0           10      M    Good
1           20      F    Nice
2           15      F    Good
3           25      M   Great
4           30      F    Nice
Encoded Employee data : 
   Employee id  Gender_F  Gender_M  Remarks_Good  Remarks_Great  Remarks_Nice
0           10       0.0       1.0           1.0            0.0           0.0
1           20       1.0       0.0           0.0            0.0           1.0
2           15       1.0       0.0           1.0            0.0           0.0
3           25       0.0       1.0           0.0            1.0           0.0
4           30       1.0       0.0           0.0            0.0           1.0


In [11]:
# import required module
from sklearn.feature_extraction.text import TfidfVectorizer


In [12]:
# assign documents
d0 = 'Geeks for geeks'
d1 = 'Geeks'
d2 = 'r2j'

# merge documents into a single corpus
string = [d0, d1, d2]

In [13]:
# create object
tfidf = TfidfVectorizer()

# get tf-df values
result = tfidf.fit_transform(string)

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer

corpus = ["This is an example.", "We are learning TF-IDF.", "TF-IDF is useful."]
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(corpus)

print('\nidf values:')
for ele1, ele2 in zip(tfidf.get_feature_names_out(), tfidf.idf_):
    print(ele1, ':', ele2)



idf values:
an : 1.6931471805599454
are : 1.6931471805599454
example : 1.6931471805599454
idf : 1.2876820724517808
is : 1.2876820724517808
learning : 1.6931471805599454
tf : 1.2876820724517808
this : 1.6931471805599454
useful : 1.6931471805599454
we : 1.6931471805599454


In [16]:
# get indexing
print('\nWord indexes:')
print(tfidf.vocabulary_)

# display tf-idf values
print('\ntf-idf value:')
print(result)

# in matrix form
print('\ntf-idf values in matrix form:')
print(result.toarray())


Word indexes:
{'this': 7, 'is': 4, 'an': 0, 'example': 2, 'we': 9, 'are': 1, 'learning': 5, 'tf': 6, 'idf': 3, 'useful': 8}

tf-idf value:
  (0, 1)	0.8355915419449176
  (0, 0)	0.5493512310263033
  (1, 1)	1.0
  (2, 2)	1.0

tf-idf values in matrix form:
[[0.54935123 0.83559154 0.        ]
 [0.         1.         0.        ]
 [0.         0.         1.        ]]


In [17]:
# import required module
from sklearn.feature_extraction.text import TfidfVectorizer

# assign corpus
string = ['Geeks geeks']*5

# create object
tfidf = TfidfVectorizer()

# get tf-df values
result = tfidf.fit_transform(string)

# get indexing
print('\nWord indexes:')
print(tfidf.vocabulary_)

# display tf-idf values
print('\ntf-idf values:')
print(result)


Word indexes:
{'geeks': 0}

tf-idf values:
  (0, 0)	1.0
  (1, 0)	1.0
  (2, 0)	1.0
  (3, 0)	1.0
  (4, 0)	1.0
