In [15]:
import warnings
warnings.filterwarnings('ignore')

## ML

In [16]:
import pandas as pd
import statsmodels.api as sm
from sklearn.datasets import fetch_california_housing

# Load the California housing dataset with scikit-learn
X, y = fetch_california_housing(return_X_y=True, as_frame=True)

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Fit the linear regression model
model = sm.OLS(y, X).fit()

# Print a summary of the model's performance
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:            MedHouseVal   R-squared:                       0.606
Model:                            OLS   Adj. R-squared:                  0.606
Method:                 Least Squares   F-statistic:                     3970.
Date:                Mon, 06 Feb 2023   Prob (F-statistic):               0.00
Time:                        21:08:49   Log-Likelihood:                -22624.
No. Observations:               20640   AIC:                         4.527e+04
Df Residuals:                   20631   BIC:                         4.534e+04
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -36.9419      0.659    -56.067      0.0

Below is a more detailed explanation
- R-squared: The R-squared value is 0.606, which means that 60.6% of the variance in the target variable is explained by the independent variables in the model.
- Adj. R-squared: The Adj. R-squared value is also 0.606, which means that 60.6% of the variance in the target variable is explained by the independent variables in the model after accounting for the number of independent variables.
- F-statistic: The F-statistic value is 3970, and the probability (F-statistic) is 0.00, indicating that the model's independent variables are useful in explaining the variation in the target variable.
- Log-Likelihood: The log-likelihood value is -22624, which indicates that the model provides a reasonable fit to the data.
- IC: The Information Criteria (IC) value is 4.527e+04, which indicates that the trade-off between fit and complexity is reasonable.
- BIC: The Bayesian Information Criteria (BIC) is similar to the IC but penalizes models with many parameters more strongly. Lower BIC values indicate a better trade-off between fit and complexity. In this case, the BIC value is 4.534e+04, which indicates that the trade-off between fit and complexity is reasonable.

In [18]:
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Load the California Housing dataset
cal_housing = fetch_california_housing()
df = pd.DataFrame(cal_housing.data, columns=cal_housing.feature_names)

# Add the target variable to the DataFrame
df['target'] = cal_housing.target

# set X and y
X = df.drop('target', axis=1)
y = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Fit the linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Predict the target variable using the test data
y_pred = regressor.predict(X_test)

# calculating performance metrics
mse = mean_squared_error(y_test, y_pred)
rmse = mse**0.5
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

# Print the performance metrics
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R-squared: {r2}")
print(f"Mean Absolute Error: {mae}")

Mean Squared Error: 0.5289841670367209
Root Mean Squared Error: 0.7273129773603114
R-squared: 0.5943232652466202
Mean Absolute Error: 0.535126133655451


## Python

In [19]:
def threeSum(nums):
    # sort the array
    nums.sort()
    N, result = len(nums), []
    # iterate through the array
    for i in range(N):
        # skip duplicates
        if i > 0 and nums[i] == nums[i - 1]:
            continue
        target = nums[i] * -1
        s, e = i + 1, N - 1
        # iterate through the rest of the array to find the other two numbers
        while s < e:
            if nums[s] + nums[e] == target:
                result.append([nums[i], nums[s], nums[e]])
                s = s + 1
                # skip duplicates
                while s < e and nums[s] == nums[s - 1]:
                    s = s + 1
            elif nums[s] + nums[e] < target:
                s = s + 1
            else:
                e = e - 1
    return result

nums = [-1,0,1,2,-1,-4]
threeSum(nums)

[[-1, -1, 2], [-1, 0, 1]]

In [22]:
def lengthOfLongestSubstring(s: str) -> int:
    seen = {}
    l = 0
    output = 0
    for r in range(len(s)):
        if s[r] not in seen:
            output = max(output,r-l+1)
        else:
            if seen[s[r]] < l:
                output = max(output,r-l+1)
            else:
                l = seen[s[r]] + 1
        seen[s[r]] = r
    return output

s = "abcabcbb"
lengthOfLongestSubstring(s)

3

In [9]:
import pandas as pd
import numpy as np


def set_zeroes(df):
    m, n = df.shape
    rows, cols = set(), set()
    
    for i in range(m):
        for j in range(n):
            if df.iloc[i, j] == 0:
                rows.add(i)
                cols.add(j)
                
    for row in rows:
        df.iloc[row, :] = 0
        
    for col in cols:
        df.iloc[:, col] = 0

df = pd.DataFrame(np.random.randint(low=1, high=10, size=(5, 5)))
df.iloc[2, 2] = 0

print("before")
print(df)

print("\nafter")
set_zeroes(df)
print(df)

before
   0  1  2  3  4
0  1  1  4  7  8
1  2  5  8  8  7
2  9  7  0  4  9
3  8  3  4  1  3
4  9  6  5  7  6

after
   0  1  2  3  4
0  1  1  0  7  8
1  2  5  0  8  7
2  0  0  0  0  0
3  8  3  0  1  3
4  9  6  0  7  6


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=a605a3e6-1564-47b2-94e7-842290ba7692' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>