-
Notifications
You must be signed in to change notification settings - Fork 380
feat: add position details to linter #4663
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| from pathlib import Path | ||
|
|
||
| from sqlmesh.core.linter.rule import Position, Range | ||
| from sqlmesh.utils.pydantic import PydanticModel | ||
| import typing as t | ||
|
|
||
|
|
||
| class TokenPositionDetails(PydanticModel): | ||
| """ | ||
| Details about a token's position in the source code in the structure provided by SQLGlot. | ||
|
|
||
| Attributes: | ||
| line (int): The line that the token ends on. | ||
| col (int): The column that the token ends on. | ||
| start (int): The start index of the token. | ||
| end (int): The ending index of the token. | ||
| """ | ||
|
|
||
| line: int | ||
| col: int | ||
| start: int | ||
| end: int | ||
|
|
||
| @staticmethod | ||
| def from_meta(meta: t.Dict[str, int]) -> "TokenPositionDetails": | ||
| return TokenPositionDetails( | ||
| line=meta["line"], | ||
| col=meta["col"], | ||
| start=meta["start"], | ||
| end=meta["end"], | ||
| ) | ||
|
|
||
| def to_range(self, read_file: t.Optional[t.List[str]]) -> Range: | ||
| """ | ||
| Convert a TokenPositionDetails object to a Range object. | ||
|
|
||
| In the circumstances where the token's start and end positions are the same, | ||
| there is no need for a read_file parameter, as the range can be derived from the token's | ||
| line and column. This is an optimization to avoid unnecessary file reads and should | ||
| only be used when the token represents a single character or position in the file. | ||
|
|
||
| If the token's start and end positions are different, the read_file parameter is required. | ||
|
|
||
| :param read_file: List of lines from the file. Optional | ||
| :return: A Range object representing the token's position | ||
| """ | ||
| if self.start == self.end: | ||
| # If the start and end positions are the same, we can create a range directly | ||
| return Range( | ||
| start=Position(line=self.line - 1, character=self.col - 1), | ||
| end=Position(line=self.line - 1, character=self.col), | ||
| ) | ||
|
|
||
| if read_file is None: | ||
| raise ValueError("read_file must be provided when start and end positions differ.") | ||
|
|
||
| # Convert from 1-indexed to 0-indexed for line only | ||
| end_line_0 = self.line - 1 | ||
| end_col_0 = self.col | ||
|
|
||
| # Find the start line and column by counting backwards from the end position | ||
| start_pos = self.start | ||
| end_pos = self.end | ||
|
|
||
| # Initialize with the end position | ||
| start_line_0 = end_line_0 | ||
| start_col_0 = end_col_0 - (end_pos - start_pos + 1) | ||
|
|
||
| # If start_col_0 is negative, we need to go back to previous lines | ||
| while start_col_0 < 0 and start_line_0 > 0: | ||
| start_line_0 -= 1 | ||
| start_col_0 += len(read_file[start_line_0]) | ||
| # Account for newline character | ||
| if start_col_0 >= 0: | ||
| break | ||
| start_col_0 += 1 # For the newline character | ||
|
|
||
| # Ensure we don't have negative values | ||
| start_col_0 = max(0, start_col_0) | ||
| return Range( | ||
| start=Position(line=start_line_0, character=start_col_0), | ||
| end=Position(line=end_line_0, character=end_col_0), | ||
| ) | ||
|
|
||
|
|
||
| def read_range_from_file(file: Path, text_range: Range) -> str: | ||
| """ | ||
| Read the file and return the content within the specified range. | ||
|
|
||
| Args: | ||
| file: Path to the file to read | ||
| text_range: The range of text to extract | ||
|
|
||
| Returns: | ||
| The content within the specified range | ||
| """ | ||
| with file.open("r", encoding="utf-8") as f: | ||
| lines = f.readlines() | ||
|
|
||
| # Ensure the range is within bounds | ||
| start_line = max(0, text_range.start.line) | ||
| end_line = min(len(lines), text_range.end.line + 1) | ||
|
|
||
| if start_line >= end_line: | ||
| return "" | ||
|
|
||
| # Extract the relevant portions of each line | ||
| result = [] | ||
| for i in range(start_line, end_line): | ||
| line = lines[i] | ||
| start_char = text_range.start.character if i == text_range.start.line else 0 | ||
| end_char = text_range.end.character if i == text_range.end.line else len(line) | ||
| result.append(line[start_char:end_char]) | ||
|
|
||
| return "".join(result) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,9 +4,11 @@ | |
|
|
||
| import typing as t | ||
|
|
||
| from sqlglot.expressions import Star | ||
| from sqlglot.helper import subclasses | ||
|
|
||
| from sqlmesh.core.linter.rule import Rule, RuleViolation | ||
| from sqlmesh.core.linter.helpers import TokenPositionDetails | ||
| from sqlmesh.core.linter.rule import Rule, RuleViolation, Range | ||
| from sqlmesh.core.linter.definition import RuleSet | ||
| from sqlmesh.core.model import Model, SqlModel | ||
|
|
||
|
|
@@ -15,10 +17,25 @@ class NoSelectStar(Rule): | |
| """Query should not contain SELECT * on its outer most projections, even if it can be expanded.""" | ||
|
|
||
| def check_model(self, model: Model) -> t.Optional[RuleViolation]: | ||
| # Only applies to SQL models, as other model types do not have a query. | ||
| if not isinstance(model, SqlModel): | ||
| return None | ||
|
|
||
| return self.violation() if model.query.is_star else None | ||
| if model.query.is_star: | ||
| violation_range = self._get_range(model) | ||
| return self.violation(violation_range=violation_range) | ||
| return None | ||
|
|
||
| def _get_range(self, model: SqlModel) -> t.Optional[Range]: | ||
| """Get the range of the violation if available.""" | ||
| try: | ||
| if len(model.query.expressions) == 1 and isinstance(model.query.expressions[0], Star): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this instead call
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this can be improved, but I'm hesitant to do so right away because it adds complexity. For me, I'm just in the simplest case of returning a range, which I think we can expand on over time. There's also a lot of other changes so rather do this later. |
||
| return TokenPositionDetails.from_meta(model.query.expressions[0].meta).to_range( | ||
| None | ||
| ) | ||
| except Exception: | ||
| pass | ||
|
|
||
| return None | ||
|
|
||
|
|
||
| class InvalidSelectStarExpansion(Rule): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does "character" mean column offset in the line? Why not use
columnor something similar so that the naming is consistent?I also suggest we start documenting some details, like whether it's 1-based indexing and stuff, so reading the docstring makes it obvious to understand what's going on.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
idem, just want to specify LSP standard